2023-12-19 22:33:39 +03:00
|
|
|
/* $NetBSD: hash.c,v 1.74 2023/12/19 19:33:39 rillig Exp $ */
|
1995-06-14 19:18:37 +04:00
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
|
2003-08-07 15:13:06 +04:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Adam de Boor.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
1993-03-21 12:45:37 +03:00
|
|
|
* Copyright (c) 1988, 1989 by Adam de Boor
|
|
|
|
* Copyright (c) 1989 by Berkeley Softworks
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* Adam de Boor.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* This product includes software developed by the University of
|
|
|
|
* California, Berkeley and its contributors.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2023-12-19 22:33:39 +03:00
|
|
|
/* Hash tables with string keys and pointer values. */
|
2020-09-28 00:35:16 +03:00
|
|
|
|
1994-03-05 03:34:29 +03:00
|
|
|
#include "make.h"
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2020-09-13 18:15:51 +03:00
|
|
|
/* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */
|
2023-12-19 22:33:39 +03:00
|
|
|
MAKE_RCSID("$NetBSD: hash.c,v 1.74 2023/12/19 19:33:39 rillig Exp $");
|
2020-09-13 18:15:51 +03:00
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
2020-10-05 22:27:47 +03:00
|
|
|
* The ratio of # entries to # buckets at which we rebuild the table to
|
|
|
|
* make it larger.
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
2020-10-05 22:27:47 +03:00
|
|
|
#define rebuildLimit 3
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2021-02-01 20:32:10 +03:00
|
|
|
/* This hash function matches Gosling's Emacs and java.lang.String. */
|
2020-10-05 22:27:47 +03:00
|
|
|
static unsigned int
|
2022-01-27 14:00:07 +03:00
|
|
|
Hash_String(const char *key, const char **out_keyEnd)
|
2020-10-05 22:27:47 +03:00
|
|
|
{
|
2020-12-15 18:20:05 +03:00
|
|
|
unsigned int h;
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
h = 0;
|
|
|
|
for (p = key; *p != '\0'; p++)
|
|
|
|
h = 31 * h + (unsigned char)*p;
|
|
|
|
|
2022-01-27 14:00:07 +03:00
|
|
|
*out_keyEnd = p;
|
2020-10-05 22:27:47 +03:00
|
|
|
return h;
|
|
|
|
}
|
2020-07-19 00:37:38 +03:00
|
|
|
|
2021-04-11 15:46:54 +03:00
|
|
|
/* This hash function matches Gosling's Emacs and java.lang.String. */
|
2020-10-25 20:01:05 +03:00
|
|
|
unsigned int
|
2021-04-11 15:46:54 +03:00
|
|
|
Hash_Substring(Substring key)
|
2020-10-25 20:01:05 +03:00
|
|
|
{
|
2021-04-11 15:46:54 +03:00
|
|
|
unsigned int h;
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
h = 0;
|
|
|
|
for (p = key.start; p != key.end; p++)
|
|
|
|
h = 31 * h + (unsigned char)*p;
|
|
|
|
return h;
|
2020-10-25 20:01:05 +03:00
|
|
|
}
|
|
|
|
|
2020-10-18 15:36:43 +03:00
|
|
|
static HashEntry *
|
2022-01-27 14:00:07 +03:00
|
|
|
HashTable_Find(HashTable *t, Substring key, unsigned int h)
|
2020-10-05 22:27:47 +03:00
|
|
|
{
|
2023-12-17 11:53:54 +03:00
|
|
|
HashEntry *he;
|
2020-10-05 23:21:30 +03:00
|
|
|
unsigned int chainlen = 0;
|
2022-01-27 14:00:07 +03:00
|
|
|
size_t keyLen = Substring_Length(key);
|
2020-10-04 21:16:09 +03:00
|
|
|
|
2020-10-05 22:27:47 +03:00
|
|
|
#ifdef DEBUG_HASH_LOOKUP
|
2022-01-27 14:00:07 +03:00
|
|
|
DEBUG4(HASH, "HashTable_Find: %p h=%08x key=%.*s\n",
|
|
|
|
t, h, (int)keyLen, key.start);
|
2021-04-11 15:46:54 +03:00
|
|
|
#endif
|
|
|
|
|
2023-12-17 11:53:54 +03:00
|
|
|
for (he = t->buckets[h & t->bucketsMask]; he != NULL; he = he->next) {
|
2021-04-11 15:46:54 +03:00
|
|
|
chainlen++;
|
2023-12-17 11:53:54 +03:00
|
|
|
if (he->hash == h &&
|
|
|
|
strncmp(he->key, key.start, keyLen) == 0 &&
|
|
|
|
he->key[keyLen] == '\0')
|
2021-04-11 15:46:54 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (chainlen > t->maxchain)
|
|
|
|
t->maxchain = chainlen;
|
|
|
|
|
2023-12-17 11:53:54 +03:00
|
|
|
return he;
|
2021-04-11 15:46:54 +03:00
|
|
|
}
|
|
|
|
|
2020-10-25 22:19:07 +03:00
|
|
|
/* Set up the hash table. */
|
1993-03-21 12:45:37 +03:00
|
|
|
void
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_Init(HashTable *t)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2020-10-05 23:21:30 +03:00
|
|
|
unsigned int n = 16, i;
|
2020-11-05 20:27:16 +03:00
|
|
|
HashEntry **buckets = bmake_malloc(sizeof *buckets * n);
|
2020-10-25 21:03:59 +03:00
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
buckets[i] = NULL;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2020-10-25 21:03:59 +03:00
|
|
|
t->buckets = buckets;
|
2020-09-05 16:55:08 +03:00
|
|
|
t->bucketsSize = n;
|
2020-10-25 21:03:59 +03:00
|
|
|
t->numEntries = 0;
|
2020-09-05 16:55:08 +03:00
|
|
|
t->bucketsMask = n - 1;
|
2020-10-25 21:03:59 +03:00
|
|
|
t->maxchain = 0;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2021-02-01 20:32:10 +03:00
|
|
|
/*
|
|
|
|
* Remove everything from the hash table and free up the memory for the keys
|
|
|
|
* of the hash table, but not for the values associated to these keys.
|
|
|
|
*/
|
1993-03-21 12:45:37 +03:00
|
|
|
void
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_Done(HashTable *t)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2020-10-25 21:12:35 +03:00
|
|
|
HashEntry **buckets = t->buckets;
|
|
|
|
size_t i, n = t->bucketsSize;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
HashEntry *he = buckets[i];
|
|
|
|
while (he != NULL) {
|
|
|
|
HashEntry *next = he->next;
|
|
|
|
free(he);
|
|
|
|
he = next;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-01 20:32:10 +03:00
|
|
|
free(t->buckets);
|
2020-10-25 22:28:44 +03:00
|
|
|
#ifdef CLEANUP
|
2020-09-02 00:11:31 +03:00
|
|
|
t->buckets = NULL;
|
2020-10-25 22:28:44 +03:00
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2020-10-25 22:19:07 +03:00
|
|
|
/* Find the entry corresponding to the key, or return NULL. */
|
2020-10-18 15:36:43 +03:00
|
|
|
HashEntry *
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_FindEntry(HashTable *t, const char *key)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2022-01-27 14:00:07 +03:00
|
|
|
const char *keyEnd;
|
|
|
|
unsigned int h = Hash_String(key, &keyEnd);
|
|
|
|
return HashTable_Find(t, Substring_Init(key, keyEnd), h);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2020-10-25 22:19:07 +03:00
|
|
|
/* Find the value corresponding to the key, or return NULL. */
|
2020-09-26 17:48:31 +03:00
|
|
|
void *
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_FindValue(HashTable *t, const char *key)
|
2020-09-26 17:48:31 +03:00
|
|
|
{
|
2020-10-25 22:19:07 +03:00
|
|
|
HashEntry *he = HashTable_FindEntry(t, key);
|
2020-10-05 22:27:47 +03:00
|
|
|
return he != NULL ? he->value : NULL;
|
|
|
|
}
|
|
|
|
|
2020-12-30 13:03:16 +03:00
|
|
|
/*
|
|
|
|
* Find the value corresponding to the key and the precomputed hash,
|
|
|
|
* or return NULL.
|
|
|
|
*/
|
2020-10-25 20:01:05 +03:00
|
|
|
void *
|
2021-04-11 15:46:54 +03:00
|
|
|
HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned int h)
|
2020-10-25 20:01:05 +03:00
|
|
|
{
|
2022-01-27 14:00:07 +03:00
|
|
|
HashEntry *he = HashTable_Find(t, key, h);
|
2020-10-25 20:01:05 +03:00
|
|
|
return he != NULL ? he->value : NULL;
|
|
|
|
}
|
|
|
|
|
2020-12-30 13:03:16 +03:00
|
|
|
/*
|
|
|
|
* Make the hash table larger. Any bucket numbers from the old table become
|
2023-12-19 22:33:39 +03:00
|
|
|
* invalid; the hash values stay valid though.
|
2020-12-30 13:03:16 +03:00
|
|
|
*/
|
2020-10-05 22:27:47 +03:00
|
|
|
static void
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_Enlarge(HashTable *t)
|
2020-10-05 22:27:47 +03:00
|
|
|
{
|
2020-10-25 20:58:53 +03:00
|
|
|
unsigned int oldSize = t->bucketsSize;
|
|
|
|
HashEntry **oldBuckets = t->buckets;
|
|
|
|
unsigned int newSize = 2 * oldSize;
|
|
|
|
unsigned int newMask = newSize - 1;
|
2020-11-05 20:27:16 +03:00
|
|
|
HashEntry **newBuckets = bmake_malloc(sizeof *newBuckets * newSize);
|
2020-10-25 20:58:53 +03:00
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0; i < newSize; i++)
|
|
|
|
newBuckets[i] = NULL;
|
|
|
|
|
|
|
|
for (i = 0; i < oldSize; i++) {
|
|
|
|
HashEntry *he = oldBuckets[i];
|
|
|
|
while (he != NULL) {
|
|
|
|
HashEntry *next = he->next;
|
2023-12-17 11:53:54 +03:00
|
|
|
he->next = newBuckets[he->hash & newMask];
|
|
|
|
newBuckets[he->hash & newMask] = he;
|
2020-10-25 20:58:53 +03:00
|
|
|
he = next;
|
2020-10-05 22:27:47 +03:00
|
|
|
}
|
|
|
|
}
|
2020-10-25 20:58:53 +03:00
|
|
|
|
|
|
|
free(oldBuckets);
|
|
|
|
|
|
|
|
t->bucketsSize = newSize;
|
|
|
|
t->bucketsMask = newMask;
|
|
|
|
t->buckets = newBuckets;
|
2021-12-27 22:06:07 +03:00
|
|
|
DEBUG4(HASH, "HashTable_Enlarge: %p size=%d entries=%d maxchain=%d\n",
|
|
|
|
(void *)t, t->bucketsSize, t->numEntries, t->maxchain);
|
2020-10-05 22:27:47 +03:00
|
|
|
t->maxchain = 0;
|
2020-09-26 17:48:31 +03:00
|
|
|
}
|
|
|
|
|
2020-12-30 13:03:16 +03:00
|
|
|
/*
|
|
|
|
* Find or create an entry corresponding to the key.
|
|
|
|
* Return in out_isNew whether a new entry has been created.
|
|
|
|
*/
|
2020-10-18 15:36:43 +03:00
|
|
|
HashEntry *
|
2021-04-03 14:08:40 +03:00
|
|
|
HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2022-01-27 14:00:07 +03:00
|
|
|
const char *keyEnd;
|
|
|
|
unsigned int h = Hash_String(key, &keyEnd);
|
|
|
|
HashEntry *he = HashTable_Find(t, Substring_Init(key, keyEnd), h);
|
2020-10-25 21:37:08 +03:00
|
|
|
|
|
|
|
if (he != NULL) {
|
|
|
|
if (out_isNew != NULL)
|
2021-04-03 14:08:40 +03:00
|
|
|
*out_isNew = false;
|
2020-10-25 21:37:08 +03:00
|
|
|
return he;
|
2020-10-05 22:27:47 +03:00
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2020-09-02 00:11:31 +03:00
|
|
|
if (t->numEntries >= rebuildLimit * t->bucketsSize)
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_Enlarge(t);
|
2020-10-05 22:27:47 +03:00
|
|
|
|
2022-01-27 14:00:07 +03:00
|
|
|
he = bmake_malloc(sizeof *he + (size_t)(keyEnd - key));
|
2020-10-25 21:37:08 +03:00
|
|
|
he->value = NULL;
|
2023-12-17 11:53:54 +03:00
|
|
|
he->hash = h;
|
2022-01-27 14:00:07 +03:00
|
|
|
memcpy(he->key, key, (size_t)(keyEnd - key) + 1);
|
2020-10-25 21:37:08 +03:00
|
|
|
|
|
|
|
he->next = t->buckets[h & t->bucketsMask];
|
|
|
|
t->buckets[h & t->bucketsMask] = he;
|
1993-03-21 12:45:37 +03:00
|
|
|
t->numEntries++;
|
|
|
|
|
2020-10-25 21:37:08 +03:00
|
|
|
if (out_isNew != NULL)
|
2021-04-03 14:08:40 +03:00
|
|
|
*out_isNew = true;
|
2020-10-25 21:37:08 +03:00
|
|
|
return he;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2021-12-15 13:07:53 +03:00
|
|
|
void
|
2020-11-15 00:29:44 +03:00
|
|
|
HashTable_Set(HashTable *t, const char *key, void *value)
|
|
|
|
{
|
|
|
|
HashEntry *he = HashTable_CreateEntry(t, key, NULL);
|
|
|
|
HashEntry_Set(he, value);
|
|
|
|
}
|
|
|
|
|
2022-02-10 00:09:24 +03:00
|
|
|
/* Delete the entry from the table, don't free the value of the entry. */
|
1993-03-21 12:45:37 +03:00
|
|
|
void
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_DeleteEntry(HashTable *t, HashEntry *he)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2023-12-17 11:53:54 +03:00
|
|
|
HashEntry **ref = &t->buckets[he->hash & t->bucketsMask];
|
2020-10-25 21:37:08 +03:00
|
|
|
HashEntry *p;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2020-10-25 21:37:08 +03:00
|
|
|
for (; (p = *ref) != NULL; ref = &p->next) {
|
|
|
|
if (p == he) {
|
|
|
|
*ref = p->next;
|
2005-08-04 04:20:12 +04:00
|
|
|
free(p);
|
1993-03-21 12:45:37 +03:00
|
|
|
t->numEntries--;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2020-12-30 13:03:16 +03:00
|
|
|
/*
|
|
|
|
* Return the next entry in the hash table, or NULL if the end of the table
|
|
|
|
* is reached.
|
|
|
|
*/
|
2020-10-18 15:36:43 +03:00
|
|
|
HashEntry *
|
2020-10-18 13:44:25 +03:00
|
|
|
HashIter_Next(HashIter *hi)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2020-10-18 15:36:43 +03:00
|
|
|
HashTable *t = hi->table;
|
2020-10-25 21:37:08 +03:00
|
|
|
HashEntry *he = hi->entry;
|
|
|
|
HashEntry **buckets = t->buckets;
|
|
|
|
unsigned int bucketsSize = t->bucketsSize;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2020-10-25 21:37:08 +03:00
|
|
|
if (he != NULL)
|
|
|
|
he = he->next; /* skip the most recently returned entry */
|
|
|
|
|
|
|
|
while (he == NULL) { /* find the next nonempty chain */
|
|
|
|
if (hi->nextBucket >= bucketsSize)
|
2008-12-13 18:19:29 +03:00
|
|
|
return NULL;
|
2020-10-25 21:37:08 +03:00
|
|
|
he = buckets[hi->nextBucket++];
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
2020-10-25 21:37:08 +03:00
|
|
|
hi->entry = he;
|
|
|
|
return he;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2020-07-20 21:12:48 +03:00
|
|
|
void
|
2020-10-25 22:19:07 +03:00
|
|
|
HashTable_DebugStats(HashTable *t, const char *name)
|
2020-07-20 21:12:48 +03:00
|
|
|
{
|
2020-10-18 15:36:43 +03:00
|
|
|
DEBUG4(HASH, "HashTable %s: size=%u numEntries=%u maxchain=%u\n",
|
2021-12-15 15:24:13 +03:00
|
|
|
name, t->bucketsSize, t->numEntries, t->maxchain);
|
2020-07-20 21:12:48 +03:00
|
|
|
}
|