Implement DT_GNU_HASH

DT_GNU_HASH serves the same purpose as DT_HASH, however it is a distinct
and faster apprach implemented and designed in the GNU toolchain in 2006.

DT_GNU_HASH is preferred whenever available.

Original GNU benchmarks claim 50% faster dynamic linking time.
https://www.sourceware.org/ml/binutils/2006-06/msg00418.html

Code based on FreeBSD and OpenBSD, both were based on DragonFlyBSD.
This commit is contained in:
kamil 2020-02-29 04:24:33 +00:00
parent 4e9bea3d5d
commit e8e477017a
4 changed files with 198 additions and 25 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $ */
/* $NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $ */
/*
* Copyright 1996 John D. Polstra.
@ -40,7 +40,7 @@
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $");
__RCSID("$NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $");
#endif /* not lint */
#include <err.h>
@ -166,26 +166,87 @@ _rtld_digest_dynamic(const char *execname, Obj_Entry *obj)
case DT_HASH:
{
uint32_t nbuckets, nchains;
const Elf_Symindx *hashtab = (const Elf_Symindx *)
(obj->relocbase + dynp->d_un.d_ptr);
if (hashtab[0] > UINT32_MAX)
obj->nbuckets = UINT32_MAX;
nbuckets = UINT32_MAX;
else
obj->nbuckets = hashtab[0];
obj->nchains = hashtab[1];
nbuckets = hashtab[0];
obj->nbuckets = nbuckets;
obj->nchains = (nchains = hashtab[1]);
obj->buckets = hashtab + 2;
obj->chains = obj->buckets + obj->nbuckets;
/* Validity check */
if (!obj->buckets || !nbuckets || !nchains)
continue;
obj->sysv_hash = true;
/*
* Should really be in _rtld_relocate_objects,
* but _rtld_symlook_obj might be used before.
*/
if (obj->nbuckets) {
fast_divide32_prepare(obj->nbuckets,
&obj->nbuckets_m,
&obj->nbuckets_s1,
&obj->nbuckets_s2);
}
fast_divide32_prepare(obj->nbuckets,
&obj->nbuckets_m,
&obj->nbuckets_s1,
&obj->nbuckets_s2);
}
break;
case DT_GNU_HASH:
{
uint32_t nmaskwords;
uint32_t nbuckets, symndx;
int bloom_size32;
bool nmw_power2;
const Elf_Symindx *hashtab = (const Elf_Symindx *)
(obj->relocbase + dynp->d_un.d_ptr);
if (hashtab[0] > UINT32_MAX)
nbuckets = UINT32_MAX;
else
nbuckets = hashtab[0];
obj->nbuckets_gnu = nbuckets;
nmaskwords = hashtab[2];
bloom_size32 = nmaskwords * (ELFSIZE / 32);
obj->buckets_gnu = hashtab + 4 + bloom_size32;
nmw_power2 = powerof2(nmaskwords);
/* Validity check */
if (!nmw_power2 || !nbuckets || !obj->buckets_gnu)
continue;
obj->gnu_hash = true;
obj->mask_bm_gnu = nmaskwords - 1;
obj->symndx_gnu = (symndx = hashtab[1]);
obj->shift2_gnu = hashtab[3];
obj->bloom_gnu = (const Elf_Addr *)(hashtab + 4);
obj->chains_gnu = obj->buckets_gnu + nbuckets - symndx;
/*
* Should really be in _rtld_relocate_objects,
* but _rtld_symlook_obj might be used before.
*/
fast_divide32_prepare(nbuckets,
&obj->nbuckets_m_gnu,
&obj->nbuckets_s1_gnu,
&obj->nbuckets_s2_gnu);
dbg(("found GNU Hash: buckets=%p "
"nbuckets=%lu chains=%p nchains=%u "
"bloom=%p mask_bm=%u shift2=%u "
"symndx=%u",
obj->buckets_gnu, obj->nbuckets_gnu,
obj->chains_gnu, obj->nchains_gnu,
obj->bloom_gnu, obj->mask_bm_gnu,
obj->shift2_gnu, obj->symndx_gnu));
}
break;
@ -352,6 +413,26 @@ _rtld_digest_dynamic(const char *execname, Obj_Entry *obj)
obj->relalim = obj->pltrela;
}
/* If the ELF Hash is present, "nchains" is the same in both hashes. */
if (!obj->sysv_hash && obj->gnu_hash) {
uint_fast32_t i, nbucket, symndx;
/* Otherwise, count the entries from the GNU Hash chain. */
nbucket = obj->nbuckets_gnu;
symndx = obj->symndx_gnu;
for (i = 0; i < nbucket; i++) {
Elf_Word bkt = obj->buckets_gnu[i];
if (bkt == 0)
continue;
const uint32_t *hashval = &obj->chains_gnu[bkt];
do {
symndx++;
} while ((*hashval++ & 1U) == 0);
}
obj->nchains_gnu = (uint32_t)symndx;
}
#ifdef RTLD_LOADER
if (init != 0)
obj->init = (Elf_Addr) obj->relocbase + init;

View File

@ -1,4 +1,4 @@
/* $NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $ */
/* $NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $ */
/*
* Copyright 1996 John D. Polstra.
@ -39,7 +39,7 @@
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $");
__RCSID("$NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $");
#endif /* not lint */
#include <err.h>
@ -174,9 +174,8 @@ _rtld_relocate_objects(Obj_Entry *first, bool bind_now)
int ok = 1;
for (obj = first; obj != NULL; obj = obj->next) {
if (obj->nbuckets == 0 || obj->nchains == 0 ||
obj->buckets == NULL || obj->symtab == NULL ||
obj->strtab == NULL) {
if ((!obj->sysv_hash && !obj->gnu_hash) ||
obj->symtab == NULL || obj->strtab == NULL) {
_rtld_error("%s: Shared object has no run-time"
" symbol table", obj->path);
return -1;

View File

@ -1,4 +1,4 @@
/* $NetBSD: rtld.h,v 1.137 2020/02/29 04:23:05 kamil Exp $ */
/* $NetBSD: rtld.h,v 1.138 2020/02/29 04:24:33 kamil Exp $ */
/*
* Copyright 1996 John D. Polstra.
@ -181,6 +181,7 @@ typedef struct Struct_Obj_Entry {
Elf_Word gotsym; /* First dynamic symbol in GOT */
#endif
/* SysV Hash fields */
const Elf_Symindx *buckets; /* Hash table buckets array */
unsigned long unused1; /* Used to be nbuckets */
const Elf_Symindx *chains; /* Hash table chain array */
@ -221,7 +222,9 @@ typedef struct Struct_Obj_Entry {
tls_done:1, /* True if static TLS offset
* has been allocated */
#endif
ref_nodel:1; /* Refcount increased to prevent dlclose */
ref_nodel:1, /* Refcount increased to prevent dlclose */
sysv_hash:1, /* SysV Hash available */
gnu_hash:1; /* GNU Hash available */
struct link_map linkmap; /* for GDB */
@ -234,10 +237,25 @@ typedef struct Struct_Obj_Entry {
void *ehdr;
/* SysV Hash fields */
uint32_t nbuckets; /* Number of buckets */
uint32_t nbuckets_m; /* Precomputed for fast remainder */
uint8_t nbuckets_s1;
uint8_t nbuckets_s2;
/* GNU Hash fields */
const uint32_t *buckets_gnu; /* Hash table buckets array */
uint32_t nbuckets_gnu; /* Number of GNU hash buckets */
uint32_t nbuckets_m_gnu; /* Precomputed for fast remainder */
uint8_t nbuckets_s1_gnu;
uint8_t nbuckets_s2_gnu;
const uint32_t *chains_gnu; /* Hash table chain array */
#define nchains_gnu nchains /* Number of symbols, shared with SysV Hash */
const Elf_Addr *bloom_gnu;
uint32_t symndx_gnu; /* First accessible symbol on dynsym table */
uint32_t mask_bm_gnu; /* Bloom filter words - 1 (bitmask) */
uint32_t shift2_gnu; /* Bloom filter shift count */
size_t pathlen; /* Pathname length */
SIMPLEQ_HEAD(, Struct_Name_Entry) names; /* List of names for this
* object we know about. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $ */
/* $NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $ */
/*
* Copyright 1996 John D. Polstra.
@ -40,7 +40,7 @@
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $");
__RCSID("$NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $");
#endif /* not lint */
#include <err.h>
@ -327,18 +327,17 @@ _rtld_symlook_obj_matched_symbol(const char *name,
* the given name. Returns a pointer to the symbol, or NULL if no
* definition was found.
*
* The symbol's hash value is passed in for efficiency reasons; that
* eliminates many recomputations of the hash value.
* SysV Hash version.
*/
const Elf_Sym *
_rtld_symlook_obj(const char *name, Elf_Hash *hash,
static const Elf_Sym *
_rtld_symlook_obj_sysv(const char *name, unsigned long hash,
const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
{
unsigned long symnum;
const Elf_Sym *vsymp = NULL;
int vcount = 0;
for (symnum = obj->buckets[fast_remainder32(hash->sysv, obj->nbuckets,
for (symnum = obj->buckets[fast_remainder32(hash, obj->nbuckets,
obj->nbuckets_m, obj->nbuckets_s1, obj->nbuckets_s2)];
symnum != ELF_SYM_UNDEFINED;
symnum = obj->chains[symnum]) {
@ -354,6 +353,82 @@ _rtld_symlook_obj(const char *name, Elf_Hash *hash,
return NULL;
}
/*
* Search the symbol table of a single shared object for a symbol of
* the given name. Returns a pointer to the symbol, or NULL if no
* definition was found.
*
* GNU Hash version.
*/
static const Elf_Sym *
_rtld_symlook_obj_gnu(const char *name, unsigned long hash,
const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
{
unsigned long symnum;
const Elf_Sym *vsymp = NULL;
const Elf32_Word *hashval;
Elf_Addr bloom_word;
Elf32_Word bucket;
int vcount = 0;
unsigned int h1, h2;
/* Pick right bitmask word from Bloom filter array */
bloom_word = obj->bloom_gnu[(hash / ELFSIZE) & obj->mask_bm_gnu];
/* Calculate modulus word size of gnu hash and its derivative */
h1 = hash & (ELFSIZE - 1);
h2 = ((hash >> obj->shift2_gnu) & (ELFSIZE - 1));
/* Filter out the "definitely not in set" queries */
if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)
return NULL;
/* Locate hash chain and corresponding value element*/
bucket = obj->buckets_gnu[fast_remainder32(hash, obj->nbuckets_gnu,
obj->nbuckets_m_gnu, obj->nbuckets_s1_gnu, obj->nbuckets_s2_gnu)];
if (bucket == 0)
return NULL;
hashval = &obj->chains_gnu[bucket];
do {
if (((*hashval ^ hash) >> 1) == 0) {
symnum = hashval - obj->chains_gnu;
if (_rtld_symlook_obj_matched_symbol(name, obj, flags,
ventry, symnum, &vsymp, &vcount)) {
return vsymp;
}
}
} while ((*hashval++ & 1) == 0);
if (vcount == 1)
return vsymp;
return NULL;
}
/*
* Search the symbol table of a single shared object for a symbol of
* the given name. Returns a pointer to the symbol, or NULL if no
* definition was found.
*
* The symbol's hash value is passed in for efficiency reasons; that
* eliminates many recomputations of the hash value.
*
* Redirect to either GNU Hash (whenever available) or ELF Hash.
*/
const Elf_Sym *
_rtld_symlook_obj(const char *name, Elf_Hash *hash,
const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
{
assert(obj->sysv_hash || obj->gnu_hash);
/* Always prefer the GNU Hash as it is faster. */
if (obj->gnu_hash)
return _rtld_symlook_obj_gnu(name, hash->gnu, obj, flags, ventry);
else
return _rtld_symlook_obj_sysv(name, hash->sysv, obj, flags, ventry);
}
/*
* Given a symbol number in a referencing object, find the corresponding
* definition of the symbol. Returns a pointer to the symbol, or NULL if