Add __cacheline_aligned and __read_mostly annotations.
These annotations help to mitigate false sharing on multiprocessor systems. Variables annotated with __cacheline_aligned are placed into the .data.cacheline_aligned section in the kernel. Each item in this section is aligned on a cachline boundary - this avoids false sharing. Highly contended global locks are a good candidate for __cacheline_aligned annotation. Variables annotated with __read_mostly are packed together tightly into a .data.read_mostly section in the kernel. The idea here is that we can pack infrequently modified data items into a cacheline and avoid having to purge the cache, which would happen if read mostly data and write mostly data shared a cachline. Initialisation variables are a prime candiate for __read_mostly annotations.
This commit is contained in:
parent
9d6b8287eb
commit
e0e10b0607
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern.ldscript,v 1.4 2007/10/18 15:28:33 yamt Exp $ */
|
||||
/* $NetBSD: kern.ldscript,v 1.5 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
|
||||
"elf64-x86-64")
|
||||
|
@ -32,8 +32,22 @@ SECTIONS
|
|||
AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
|
||||
{
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
}
|
||||
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
|
||||
{
|
||||
*(.data.cacheline_aligned)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
|
||||
{
|
||||
*(.data.read_mostly)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
|
||||
_edata = . ;
|
||||
PROVIDE (edata = .) ;
|
||||
__bss_start = . ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern.ldscript.2MB,v 1.2 2007/10/18 15:28:33 yamt Exp $ */
|
||||
/* $NetBSD: kern.ldscript.2MB,v 1.3 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
|
||||
"elf64-x86-64")
|
||||
|
@ -33,8 +33,20 @@ SECTIONS
|
|||
AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
|
||||
{
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
|
||||
{
|
||||
*(.data.cacheline_aligned)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
|
||||
{
|
||||
*(.data.read_mostly)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
_edata = . ;
|
||||
PROVIDE (edata = .) ;
|
||||
__bss_start = . ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern.ldscript.Xen,v 1.2 2007/11/22 16:16:44 bouyer Exp $ */
|
||||
/* $NetBSD: kern.ldscript.Xen,v 1.3 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
|
||||
"elf64-x86-64")
|
||||
|
@ -26,8 +26,20 @@ SECTIONS
|
|||
AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
|
||||
{
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
|
||||
{
|
||||
*(.data.cacheline_aligned)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
|
||||
{
|
||||
*(.data.read_mostly)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
_edata = . ;
|
||||
PROVIDE (edata = .) ;
|
||||
__bss_start = . ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern.ldscript,v 1.7 2007/10/18 15:28:34 yamt Exp $ */
|
||||
/* $NetBSD: kern.ldscript,v 1.8 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
OUTPUT_FORMAT("elf32-i386", "elf32-i386",
|
||||
"elf32-i386")
|
||||
|
@ -32,8 +32,21 @@ SECTIONS
|
|||
AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
|
||||
{
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text)))
|
||||
{
|
||||
*(.data.cacheline_aligned)
|
||||
}
|
||||
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly :
|
||||
AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text)))
|
||||
{
|
||||
*(.data.read_mostly)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
_edata = . ;
|
||||
PROVIDE (edata = .) ;
|
||||
__bss_start = . ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern.ldscript,v 1.10 2007/10/17 19:57:28 garbled Exp $ */
|
||||
/* $NetBSD: kern.ldscript,v 1.11 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
/*
|
||||
* Kernel linker script for NetBSD/sparc64. This script is based on
|
||||
|
@ -74,6 +74,11 @@ SECTIONS
|
|||
CONSTRUCTORS
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned : { *(.data.cacheline_aligned) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly : { *(.data.read_mostly) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.ctors :
|
||||
{
|
||||
*(.ctors)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kern32.ldscript,v 1.9 2007/10/17 19:57:28 garbled Exp $ */
|
||||
/* $NetBSD: kern32.ldscript,v 1.10 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
/*
|
||||
* Kernel linker script for NetBSD/sparc. This script is based on
|
||||
|
@ -75,6 +75,11 @@ SECTIONS
|
|||
CONSTRUCTORS
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned : { *(.data_cacheline_aligned) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly : { *(.data.read_mostly) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.ctors :
|
||||
{
|
||||
*(.ctors)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: kmodule,v 1.1 2009/11/10 14:47:52 skrll Exp $ */
|
||||
/* $NetBSD: kmodule,v 1.2 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
|
@ -6,11 +6,15 @@ SECTIONS
|
|||
.data 0 :
|
||||
{
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
*(.bss)
|
||||
*(.bss.*)
|
||||
*(COMMON)
|
||||
}
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.cacheline_aligned : { *(.data.cacheline_aligned) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
.data.read_mostly : { *(.data.read_mostly) }
|
||||
. = ALIGN(64); /* COHERENCY_UNIT */
|
||||
/* Pre-loaded modules do not need the following. */
|
||||
/DISCARD/ :
|
||||
{
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: if_gre.h,v 1.39 2008/09/08 23:36:55 gmcgarry Exp $ */
|
||||
/* $NetBSD: if_gre.h,v 1.40 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -63,12 +63,10 @@ enum gre_state {
|
|||
, GRE_S_DIE
|
||||
};
|
||||
|
||||
#define __cacheline_aligned __aligned(CACHE_LINE_SIZE)
|
||||
|
||||
struct gre_bufq {
|
||||
volatile int bq_prodidx;
|
||||
volatile int bq_considx;
|
||||
size_t bq_len __cacheline_aligned;
|
||||
size_t bq_len __aligned(CACHE_LINE_SIZE);
|
||||
size_t bq_lenmask;
|
||||
volatile int bq_drops;
|
||||
struct mbuf **bq_buf;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: cdefs_elf.h,v 1.30 2008/07/21 15:22:19 lukem Exp $ */
|
||||
/* $NetBSD: cdefs_elf.h,v 1.31 2010/06/01 22:13:30 mjf Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995, 1996 Carnegie-Mellon University.
|
||||
|
@ -30,6 +30,10 @@
|
|||
#ifndef _SYS_CDEFS_ELF_H_
|
||||
#define _SYS_CDEFS_ELF_H_
|
||||
|
||||
#ifdef _KERNEL_OPT
|
||||
#include "opt_multiprocessor.h"
|
||||
#endif
|
||||
|
||||
#ifdef __LEADING_UNDERSCORE
|
||||
#define _C_LABEL(x) __CONCAT(_,x)
|
||||
#define _C_LABEL_STRING(x) "_"x
|
||||
|
@ -156,4 +160,38 @@
|
|||
#define __link_set_count(set) \
|
||||
(__link_set_end(set) - __link_set_start(set))
|
||||
|
||||
/*
|
||||
* On multiprocessor systems we can gain an improvement in performance
|
||||
* by being mindful of which cachelines data is placed in.
|
||||
*
|
||||
* __read_mostly:
|
||||
*
|
||||
* It makes sense to ensure that rarely modified data is not
|
||||
* placed in the same cacheline as frequently modified data.
|
||||
* To mitigate the phenomenon known as "false-sharing" we
|
||||
* can annotate rarely modified variables with __read_mostly.
|
||||
* All such variables are placed into the .data.read_mostly
|
||||
* section in the kernel ELF.
|
||||
*
|
||||
* Prime candidates for __read_mostly annotation are variables
|
||||
* which are hardly ever modified and which are used in code
|
||||
* hot-paths, e.g. pmap_initialized.
|
||||
*
|
||||
* __cacheline_aligned:
|
||||
*
|
||||
* Some data structures (mainly locks) benefit from being aligned
|
||||
* on a cacheline boundary, and having a cacheline to themselves.
|
||||
* This way, the modification of other data items cannot adversely
|
||||
* affect the lock and vice versa.
|
||||
*
|
||||
* Any variables annotated with __cacheline_aligned will be
|
||||
* placed into the .data.cacheline_aligned ELF section.
|
||||
*/
|
||||
#define __read_mostly \
|
||||
__attribute__((__section__(".data.read_mostly")))
|
||||
|
||||
#define __cacheline_aligned \
|
||||
__attribute__((__aligned__(COHERENCY_UNIT) \
|
||||
__section__(".data.cacheline_aligned")))
|
||||
|
||||
#endif /* !_SYS_CDEFS_ELF_H_ */
|
||||
|
|
Loading…
Reference in New Issue