From 4691bf4bd7e3cb468c125a730ed521d91489cab9 Mon Sep 17 00:00:00 2001 From: riastradh Date: Fri, 20 Oct 2017 14:48:43 +0000 Subject: [PATCH] Carve out KVA for execargs on boot from an exec_map like we used to. Candidate fix for PR kern/45718: `processes sometimes get stuck and spin in vm_map', a problem that has been plaguing all our 32-bit ports for years. Since we currently use large (256k) buffers for execargs, and since nobody has stepped up to tackle breaking them into bite-sized (or at least page-sized) chunks, after KVA gets sufficiently fragmented we can't allocate new execargs buffers from kernel_map. Until 2008, we always carved out KVA for execargs on boot with a uvm submap exec_map of kernel_map. Then ad@ found that the uvm_km_free call, to discard them when done, cost about 100us, which a pool avoided: https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved KVA in the exec_map submap. This change preserves the pool, but restores exec_map (with less code, by putting it in MI code instead of copying it in every MD initialization routine). Patch proposed on tech-kern: https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html Patch tested by bouyer@: https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html I previously discussed the issue on tech-kern before I knew of the history around exec_map: https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html The candidate workaround I proposed of using pool_setlowat to force preallocation of KVA would also force preallocation of physical RAM, which is a waste not incurred by using exec_map, and which is part of why I never committed it. There may remain a general problem that if thread A calls pool_get and tries to service that request by a uvm_km_alloc call that hangs because KVA is scarce, and thread B does pool_put, the pool_put in thread B will not notify the pool_get in thread A that it doesn't need to wait for KVA, and so thread A may continue to hang in uvm_km_alloc. However, (a) That won't apply here, because there is exactly as much KVA available in exec_map as exec_pool will ever try to use. (b) It is possible that may not even matter in other cases as long as the page daemon eventually tries to shrink the pool, which will cause a uvm_km_free that can unhang the hung uvm_km_alloc. XXX pullup-8 XXX pullup-7 XXX pullup-6 XXX pullup-5, perhaps... --- sys/kern/kern_exec.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 975a2edfc871..df1c342392c1 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exec.c,v 1.447 2017/10/20 12:11:34 martin Exp $ */ +/* $NetBSD: kern_exec.c,v 1.448 2017/10/20 14:48:43 riastradh Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.447 2017/10/20 12:11:34 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.448 2017/10/20 14:48:43 riastradh Exp $"); #include "opt_exec.h" #include "opt_execfmt.h" @@ -280,11 +280,14 @@ struct spawn_exec_data { volatile uint32_t sed_refcnt; }; +static struct vm_map *exec_map; +static struct pool exec_pool; + static void * exec_pool_alloc(struct pool *pp, int flags) { - return (void *)uvm_km_alloc(kernel_map, NCARGS, 0, + return (void *)uvm_km_alloc(exec_map, NCARGS, 0, UVM_KMF_PAGEABLE | UVM_KMF_WAITVA); } @@ -292,11 +295,9 @@ static void exec_pool_free(struct pool *pp, void *addr) { - uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE); + uvm_km_free(exec_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE); } -static struct pool exec_pool; - static struct pool_allocator exec_palloc = { .pa_alloc = exec_pool_alloc, .pa_free = exec_pool_free, @@ -1820,8 +1821,12 @@ exec_init(int init_boot) if (init_boot) { /* do one-time initializations */ + vaddr_t vmin, vmax; + rw_init(&exec_lock); mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE); + exec_map = uvm_km_suballoc(kernel_map, &vmin, &vmax, + maxexec*NCARGS, VM_MAP_PAGEABLE, false, NULL); pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH, "execargs", &exec_palloc, IPL_NONE); pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);