From e0e10b0607b6b59d8d4315ce6267af651d277586 Mon Sep 17 00:00:00 2001 From: mjf Date: Tue, 1 Jun 2010 22:13:30 +0000 Subject: [PATCH] Add __cacheline_aligned and __read_mostly annotations. These annotations help to mitigate false sharing on multiprocessor systems. Variables annotated with __cacheline_aligned are placed into the .data.cacheline_aligned section in the kernel. Each item in this section is aligned on a cachline boundary - this avoids false sharing. Highly contended global locks are a good candidate for __cacheline_aligned annotation. Variables annotated with __read_mostly are packed together tightly into a .data.read_mostly section in the kernel. The idea here is that we can pack infrequently modified data items into a cacheline and avoid having to purge the cache, which would happen if read mostly data and write mostly data shared a cachline. Initialisation variables are a prime candiate for __read_mostly annotations. --- sys/arch/amd64/conf/kern.ldscript | 18 ++++++++++-- sys/arch/amd64/conf/kern.ldscript.2MB | 16 +++++++++-- sys/arch/amd64/conf/kern.ldscript.Xen | 16 +++++++++-- sys/arch/i386/conf/kern.ldscript | 17 ++++++++++-- sys/arch/sparc64/conf/kern.ldscript | 7 ++++- sys/arch/sparc64/conf/kern32.ldscript | 7 ++++- sys/modules/xldscripts/kmodule | 8 ++++-- sys/net/if_gre.h | 6 ++-- sys/sys/cdefs_elf.h | 40 ++++++++++++++++++++++++++- 9 files changed, 118 insertions(+), 17 deletions(-) diff --git a/sys/arch/amd64/conf/kern.ldscript b/sys/arch/amd64/conf/kern.ldscript index 2a77faea33bd..4e513e2a240f 100644 --- a/sys/arch/amd64/conf/kern.ldscript +++ b/sys/arch/amd64/conf/kern.ldscript @@ -1,4 +1,4 @@ -/* $NetBSD: kern.ldscript,v 1.4 2007/10/18 15:28:33 yamt Exp $ */ +/* $NetBSD: kern.ldscript,v 1.5 2010/06/01 22:13:30 mjf Exp $ */ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") @@ -32,8 +32,22 @@ SECTIONS AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text))) { *(.data) - *(.data.*) } + + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : + AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text))) + { + *(.data.cacheline_aligned) + } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : + AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text))) + { + *(.data.read_mostly) + } + . = ALIGN(64); /* COHERENCY_UNIT */ + _edata = . ; PROVIDE (edata = .) ; __bss_start = . ; diff --git a/sys/arch/amd64/conf/kern.ldscript.2MB b/sys/arch/amd64/conf/kern.ldscript.2MB index 1a3ced110ffe..f11c5df71729 100644 --- a/sys/arch/amd64/conf/kern.ldscript.2MB +++ b/sys/arch/amd64/conf/kern.ldscript.2MB @@ -1,4 +1,4 @@ -/* $NetBSD: kern.ldscript.2MB,v 1.2 2007/10/18 15:28:33 yamt Exp $ */ +/* $NetBSD: kern.ldscript.2MB,v 1.3 2010/06/01 22:13:30 mjf Exp $ */ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") @@ -33,8 +33,20 @@ SECTIONS AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text))) { *(.data) - *(.data.*) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : + AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text))) + { + *(.data.cacheline_aligned) + } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : + AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text))) + { + *(.data.read_mostly) + } + . = ALIGN(64); /* COHERENCY_UNIT */ _edata = . ; PROVIDE (edata = .) ; __bss_start = . ; diff --git a/sys/arch/amd64/conf/kern.ldscript.Xen b/sys/arch/amd64/conf/kern.ldscript.Xen index 952c6938fc4e..8f09d8574f31 100644 --- a/sys/arch/amd64/conf/kern.ldscript.Xen +++ b/sys/arch/amd64/conf/kern.ldscript.Xen @@ -1,4 +1,4 @@ -/* $NetBSD: kern.ldscript.Xen,v 1.2 2007/11/22 16:16:44 bouyer Exp $ */ +/* $NetBSD: kern.ldscript.Xen,v 1.3 2010/06/01 22:13:30 mjf Exp $ */ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") @@ -26,8 +26,20 @@ SECTIONS AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text))) { *(.data) - *(.data.*) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : + AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text))) + { + *(.data.cacheline_aligned) + } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : + AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text))) + { + *(.data.read_mostly) + } + . = ALIGN(64); /* COHERENCY_UNIT */ _edata = . ; PROVIDE (edata = .) ; __bss_start = . ; diff --git a/sys/arch/i386/conf/kern.ldscript b/sys/arch/i386/conf/kern.ldscript index 1827f14ebc00..000576a66d2e 100644 --- a/sys/arch/i386/conf/kern.ldscript +++ b/sys/arch/i386/conf/kern.ldscript @@ -1,4 +1,4 @@ -/* $NetBSD: kern.ldscript,v 1.7 2007/10/18 15:28:34 yamt Exp $ */ +/* $NetBSD: kern.ldscript,v 1.8 2010/06/01 22:13:30 mjf Exp $ */ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") @@ -32,8 +32,21 @@ SECTIONS AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text))) { *(.data) - *(.data.*) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : + AT (LOADADDR(.text) + (ADDR(.data.cacheline_aligned) - ADDR(.text))) + { + *(.data.cacheline_aligned) + } + + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : + AT (LOADADDR(.text) + (ADDR(.data.read_mostly) - ADDR(.text))) + { + *(.data.read_mostly) + } + . = ALIGN(64); /* COHERENCY_UNIT */ _edata = . ; PROVIDE (edata = .) ; __bss_start = . ; diff --git a/sys/arch/sparc64/conf/kern.ldscript b/sys/arch/sparc64/conf/kern.ldscript index cef558edd4e2..c0cfdf143c53 100644 --- a/sys/arch/sparc64/conf/kern.ldscript +++ b/sys/arch/sparc64/conf/kern.ldscript @@ -1,4 +1,4 @@ -/* $NetBSD: kern.ldscript,v 1.10 2007/10/17 19:57:28 garbled Exp $ */ +/* $NetBSD: kern.ldscript,v 1.11 2010/06/01 22:13:30 mjf Exp $ */ /* * Kernel linker script for NetBSD/sparc64. This script is based on @@ -74,6 +74,11 @@ SECTIONS CONSTRUCTORS } .data1 : { *(.data1) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : { *(.data.read_mostly) } + . = ALIGN(64); /* COHERENCY_UNIT */ .ctors : { *(.ctors) diff --git a/sys/arch/sparc64/conf/kern32.ldscript b/sys/arch/sparc64/conf/kern32.ldscript index e90d9b1fcb82..f6894cdbc4cf 100644 --- a/sys/arch/sparc64/conf/kern32.ldscript +++ b/sys/arch/sparc64/conf/kern32.ldscript @@ -1,4 +1,4 @@ -/* $NetBSD: kern32.ldscript,v 1.9 2007/10/17 19:57:28 garbled Exp $ */ +/* $NetBSD: kern32.ldscript,v 1.10 2010/06/01 22:13:30 mjf Exp $ */ /* * Kernel linker script for NetBSD/sparc. This script is based on @@ -75,6 +75,11 @@ SECTIONS CONSTRUCTORS } .data1 : { *(.data1) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : { *(.data_cacheline_aligned) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : { *(.data.read_mostly) } + . = ALIGN(64); /* COHERENCY_UNIT */ .ctors : { *(.ctors) diff --git a/sys/modules/xldscripts/kmodule b/sys/modules/xldscripts/kmodule index 3b270c223d6d..d73b581427ac 100644 --- a/sys/modules/xldscripts/kmodule +++ b/sys/modules/xldscripts/kmodule @@ -1,4 +1,4 @@ -/* $NetBSD: kmodule,v 1.1 2009/11/10 14:47:52 skrll Exp $ */ +/* $NetBSD: kmodule,v 1.2 2010/06/01 22:13:30 mjf Exp $ */ SECTIONS { @@ -6,11 +6,15 @@ SECTIONS .data 0 : { *(.data) - *(.data.*) *(.bss) *(.bss.*) *(COMMON) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + . = ALIGN(64); /* COHERENCY_UNIT */ + .data.read_mostly : { *(.data.read_mostly) } + . = ALIGN(64); /* COHERENCY_UNIT */ /* Pre-loaded modules do not need the following. */ /DISCARD/ : { diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index 335c04e66267..599748ffb6c5 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -1,4 +1,4 @@ -/* $NetBSD: if_gre.h,v 1.39 2008/09/08 23:36:55 gmcgarry Exp $ */ +/* $NetBSD: if_gre.h,v 1.40 2010/06/01 22:13:30 mjf Exp $ */ /* * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. @@ -63,12 +63,10 @@ enum gre_state { , GRE_S_DIE }; -#define __cacheline_aligned __aligned(CACHE_LINE_SIZE) - struct gre_bufq { volatile int bq_prodidx; volatile int bq_considx; - size_t bq_len __cacheline_aligned; + size_t bq_len __aligned(CACHE_LINE_SIZE); size_t bq_lenmask; volatile int bq_drops; struct mbuf **bq_buf; diff --git a/sys/sys/cdefs_elf.h b/sys/sys/cdefs_elf.h index 5c223a7f8988..0c30f9aed638 100644 --- a/sys/sys/cdefs_elf.h +++ b/sys/sys/cdefs_elf.h @@ -1,4 +1,4 @@ -/* $NetBSD: cdefs_elf.h,v 1.30 2008/07/21 15:22:19 lukem Exp $ */ +/* $NetBSD: cdefs_elf.h,v 1.31 2010/06/01 22:13:30 mjf Exp $ */ /* * Copyright (c) 1995, 1996 Carnegie-Mellon University. @@ -30,6 +30,10 @@ #ifndef _SYS_CDEFS_ELF_H_ #define _SYS_CDEFS_ELF_H_ +#ifdef _KERNEL_OPT +#include "opt_multiprocessor.h" +#endif + #ifdef __LEADING_UNDERSCORE #define _C_LABEL(x) __CONCAT(_,x) #define _C_LABEL_STRING(x) "_"x @@ -156,4 +160,38 @@ #define __link_set_count(set) \ (__link_set_end(set) - __link_set_start(set)) +/* + * On multiprocessor systems we can gain an improvement in performance + * by being mindful of which cachelines data is placed in. + * + * __read_mostly: + * + * It makes sense to ensure that rarely modified data is not + * placed in the same cacheline as frequently modified data. + * To mitigate the phenomenon known as "false-sharing" we + * can annotate rarely modified variables with __read_mostly. + * All such variables are placed into the .data.read_mostly + * section in the kernel ELF. + * + * Prime candidates for __read_mostly annotation are variables + * which are hardly ever modified and which are used in code + * hot-paths, e.g. pmap_initialized. + * + * __cacheline_aligned: + * + * Some data structures (mainly locks) benefit from being aligned + * on a cacheline boundary, and having a cacheline to themselves. + * This way, the modification of other data items cannot adversely + * affect the lock and vice versa. + * + * Any variables annotated with __cacheline_aligned will be + * placed into the .data.cacheline_aligned ELF section. + */ +#define __read_mostly \ + __attribute__((__section__(".data.read_mostly"))) + +#define __cacheline_aligned \ + __attribute__((__aligned__(COHERENCY_UNIT) \ + __section__(".data.cacheline_aligned"))) + #endif /* !_SYS_CDEFS_ELF_H_ */