NetBSD/sys/kern/init_main.c

857 lines
21 KiB
C
Raw Normal View History

/* $NetBSD: init_main.c,v 1.318 2007/10/01 22:05:15 martin Exp $ */
1994-07-03 15:45:41 +04:00
/*
* Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)init_main.c 8.16 (Berkeley) 5/14/95
*/
/*
* Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
1994-07-03 15:45:41 +04:00
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
1998-03-01 05:20:01 +03:00
* @(#)init_main.c 8.16 (Berkeley) 5/14/95
1994-07-03 15:45:41 +04:00
*/
2001-11-12 18:25:01 +03:00
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.318 2007/10/01 22:05:15 martin Exp $");
2001-11-12 18:25:01 +03:00
#include "opt_ipsec.h"
#include "opt_multiprocessor.h"
#include "opt_ntp.h"
#include "opt_pipe.h"
#include "opt_posix.h"
#include "opt_syscall_debug.h"
#include "opt_sysv.h"
2007-02-10 00:55:00 +03:00
#include "opt_systrace.h"
#include "opt_fileassoc.h"
#include "opt_ktrace.h"
#include "opt_pax.h"
#include "rnd.h"
#include "sysmon_envsys.h"
#include "sysmon_power.h"
#include "sysmon_taskq.h"
#include "sysmon_wdog.h"
#include "veriexec.h"
1994-07-03 15:45:41 +04:00
#include <sys/param.h>
#include <sys/acct.h>
1994-07-03 15:45:41 +04:00
#include <sys/filedesc.h>
#include <sys/file.h>
1994-07-03 15:45:41 +04:00
#include <sys/errno.h>
#include <sys/callout.h>
#include <sys/cpu.h>
1994-07-03 15:45:41 +04:00
#include <sys/kernel.h>
#include <sys/kmem.h>
1994-07-03 15:45:41 +04:00
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/kthread.h>
1994-07-03 15:45:41 +04:00
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/fstrans.h>
#include <sys/tty.h>
1994-07-03 15:45:41 +04:00
#include <sys/conf.h>
#include <sys/disklabel.h>
1994-07-03 15:45:41 +04:00
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/exec.h>
1998-08-02 08:53:11 +04:00
#include <sys/socketvar.h>
1994-07-03 15:45:41 +04:00
#include <sys/protosw.h>
#include <sys/reboot.h>
#include <sys/user.h>
#include <sys/sysctl.h>
#include <sys/event.h>
#include <sys/mbuf.h>
#include <sys/sched.h>
2007-02-10 00:55:00 +03:00
#include <sys/sleepq.h>
#include <sys/iostat.h>
2007-06-17 17:34:42 +04:00
#include <sys/vmem.h>
#include <sys/uuid.h>
#include <sys/extent.h>
2007-07-21 23:51:47 +04:00
#include <sys/disk.h>
#include <sys/mqueue.h>
2004-03-09 05:35:45 +03:00
#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#endif
1996-02-09 21:59:18 +03:00
#ifdef SYSVSHM
#include <sys/shm.h>
#endif
2004-03-09 05:35:45 +03:00
#ifdef SYSVSEM
1996-02-09 21:59:18 +03:00
#include <sys/sem.h>
#endif
#ifdef SYSVMSG
#include <sys/msg.h>
#endif
2007-02-10 00:55:00 +03:00
#ifdef SYSTRACE
#include <sys/systrace.h>
#endif
#ifdef P1003_1B_SEMAPHORE
#include <sys/ksem.h>
#endif
1996-02-09 21:59:18 +03:00
#include <sys/domain.h>
1997-01-31 03:50:38 +03:00
#include <sys/namei.h>
#if NRND > 0
#include <sys/rnd.h>
#endif
#include <sys/pipe.h>
#ifdef LKM
#include <sys/lkm.h>
#endif
#if NVERIEXEC > 0
#include <sys/verified_exec.h>
#endif /* NVERIEXEC > 0 */
2007-02-10 00:55:00 +03:00
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
#include <sys/debug.h>
2006-05-15 01:15:11 +04:00
#include <sys/kauth.h>
Resolve conflicts and adapt to NetBSD. Thanks to dyoung@, scw@, and perry@ for help testing. 2005-08-30 15:27 avatar Properly set ic_curchan before calling back to device driver to do channel switching(ifconfig devX channel Y). This fix should make channel changing works again in monitor mode. Submitted by: sam X-MFC-With: other ic_curchan changes 2005-08-13 18:50 sam revert 1.64: we cannot use the channel characteristics to decide when to do 11g erp sta accounting because b/g channels show up as false positives when operating in 11b. Noticed by: Michal Mertl 2005-08-13 18:31 sam Extend acl support to pass ioctl requests through and use this to add support for getting the current policy setting and collecting the list of mac addresses in the acl table. Submitted by: Michal Mertl (original version) MFC after: 2 weeks 2005-08-10 18:42 sam Don't use ic_curmode to decide when to do 11g station accounting, use the station channel properties. Fixes assert failure/bogus operation when an ap is operating in 11a and has associated stations then switches to 11g. Noticed by: Michal Mertl Reviewed by: avatar MFC after: 2 weeks 2005-08-10 17:22 sam Clarify/fix handling of the current channel: o add ic_curchan and use it uniformly for specifying the current channel instead of overloading ic->ic_bss->ni_chan (or in some drivers ic_ibss_chan) o add ieee80211_scanparams structure to encapsulate scanning-related state captured for rx frames o move rx beacon+probe response frame handling into separate routines o change beacon+probe response handling to treat the scan table more like a scan cache--look for an existing entry before adding a new one; this combined with ic_curchan use corrects handling of stations that were previously found at a different channel o move adhoc neighbor discovery by beacon+probe response frames to a new ieee80211_add_neighbor routine Reviewed by: avatar Tested by: avatar, Michal Mertl MFC after: 2 weeks 2005-08-09 11:19 rwatson Propagate rename of IFF_OACTIVE and IFF_RUNNING to IFF_DRV_OACTIVE and IFF_DRV_RUNNING, as well as the move from ifnet.if_flags to ifnet.if_drv_flags. Device drivers are now responsible for synchronizing access to these flags, as they are in if_drv_flags. This helps prevent races between the network stack and device driver in maintaining the interface flags field. Many __FreeBSD__ and __FreeBSD_version checks maintained and continued; some less so. Reviewed by: pjd, bz MFC after: 7 days 2005-08-08 19:46 sam Split crypto tx+rx key indices and add a key index -> node mapping table: Crypto changes: o change driver/net80211 key_alloc api to return tx+rx key indices; a driver can leave the rx key index set to IEEE80211_KEYIX_NONE or set it to be the same as the tx key index (the former disables use of the key index in building the keyix->node mapping table and is the default setup for naive drivers by null_key_alloc) o add cs_max_keyid to crypto state to specify the max h/w key index a driver will return; this is used to allocate the key index mapping table and to bounds check table loookups o while here introduce ieee80211_keyix (finally) for the type of a h/w key index o change crypto notifiers for rx failures to pass the rx key index up as appropriate (michael failure, replay, etc.) Node table changes: o optionally allocate a h/w key index to node mapping table for the station table using the max key index setting supplied by drivers (note the scan table does not get a map) o defer node table allocation to lateattach so the driver has a chance to set the max key id to size the key index map o while here also defer the aid bitmap allocation o add new ieee80211_find_rxnode_withkey api to find a sta/node entry on frame receive with an optional h/w key index to use in checking mapping table; also updates the map if it does a hash lookup and the found node has a rx key index set in the unicast key; note this work is separated from the old ieee80211_find_rxnode call so drivers do not need to be aware of the new mechanism o move some node table manipulation under the node table lock to close a race on node delete o add ieee80211_node_delucastkey to do the dirty work of deleting unicast key state for a node (deletes any key and handles key map references) Ath driver: o nuke private sc_keyixmap mechansim in favor of net80211 support o update key alloc api These changes close several race conditions for the ath driver operating in ap mode. Other drivers should see no change. Station mode operation for ath no longer uses the key index map but performance tests show no noticeable change and this will be fixed when the scan table is eliminated with the new scanning support. Tested by: Michal Mertl, avatar, others Reviewed by: avatar, others MFC after: 2 weeks 2005-08-08 06:49 sam use ieee80211_iterate_nodes to retrieve station data; the previous code walked the list w/o locking MFC after: 1 week 2005-08-08 04:30 sam Cleanup beacon/listen interval handling: o separate configured beacon interval from listen interval; this avoids potential use of one value for the other (e.g. setting powersavesleep to 0 clobbers the beacon interval used in hostap or ibss mode) o bounds check the beacon interval received in probe response and beacon frames and drop frames with bogus settings; not clear if we should instead clamp the value as any alteration would result in mismatched sta+ap configuration and probably be more confusing (don't want to log to the console but perhaps ok with rate limiting) o while here up max beacon interval to reflect WiFi standard Noticed by: Martin <nakal@nurfuerspam.de> MFC after: 1 week 2005-08-06 05:57 sam fix debug msg typo MFC after: 3 days 2005-08-06 05:56 sam Fix handling of frames sent prior to a station being authorized when operating in ap mode. Previously we allocated a node from the station table, sent the frame (using the node), then released the reference that "held the frame in the table". But while the frame was in flight the node might be reclaimed which could lead to problems. The solution is to add an ieee80211_tmp_node routine that crafts a node that does exist in a table and so isn't ever reclaimed; it exists only so long as the associated frame is in flight. MFC after: 5 days 2005-07-31 07:12 sam close a race between reclaiming a node when a station is inactive and sending the null data frame used to probe inactive stations MFC after: 5 days 2005-07-27 05:41 sam when bridging internally bypass the bss node as traffic to it must follow the normal input path Submitted by: Michal Mertl MFC after: 5 days 2005-07-27 03:53 sam bandaid ni_fails handling so ap's with association failures are reconsidered after a bit; a proper fix involves more changes to the scanning infrastructure Reviewed by: avatar, David Young MFC after: 5 days 2005-07-23 01:16 sam the AREF flag is only meaningful in ap mode; adhoc neighbors now are timed out of the sta/neighbor table 2005-07-23 00:25 sam o move inactivity-related debug msgs under IEEE80211_MSG_INACT o probe inactive neighbors in adhoc mode (they don't have an association id so previously were being timed out) MFC after: 3 days 2005-07-22 22:11 sam split xmit of probe request frame out into a separate routine that takes explicit parameters; this will be needed when scanning is decoupled from the state machine to do bg scanning MFC after: 3 days 2005-07-22 21:48 sam split 802.11 frame xmit setup code into ieee80211_send_setup MFC after: 3 days 2005-07-22 18:57 sam simplify ic_newassoc callback MFC after: 3 days 2005-07-22 18:54 sam simplify ieee80211_ibss_merge api MFC after: 3 days 2005-07-22 18:50 sam add stats we know we'll need soon and some spare fields for future expansion MFC after: 3 days 2005-07-22 18:45 sam simplify tim callback api MFC after: 3 days 2005-07-22 18:42 sam don't include 802.3 header in min frame length calculation as it may not be present for a frag; fixes problem with small (fragmented) frames being dropped Obtained from: Atheros MFC after: 3 days 2005-07-22 18:36 sam simplify ieee80211_node_authorize and ieee80211_node_unauthorize api's MFC after: 3 days 2005-07-22 18:31 sam simplifiy ieee80211_send_nulldata api MFC after: 3 days 2005-07-22 18:29 sam simplify rate set api's by removing ic parameter (implicit in node reference) MFC after: 3 days 2005-07-22 18:21 sam reject association requests with a wpa/rsn ie when wpa/rsn is not configured on the ap; previously we either ignored the ie or (possibly) failed an assertion Obtained from: Atheros MFC after: 3 days 2005-07-22 18:16 sam missed one in last commit; add device name to discard msgs 2005-07-22 18:13 sam include device name in discard msgs 2005-07-22 18:12 sam add diag msgs for frames discarded because the direction field is wrong 2005-07-22 18:08 sam split data frame delivery out to a new function ieee80211_deliver_data 2005-07-22 18:00 sam o add IEEE80211_IOC_FRAGTHRESHOLD for getting+setting the tx fragmentation threshold o fix bounds checking on IEEE80211_IOC_RTSTHRESHOLD MFC after: 3 days 2005-07-22 17:55 sam o add IEEE80211_FRAG_DEFAULT o move default settings for RTS and frag thresholds to ieee80211_var.h 2005-07-22 17:50 sam diff reduction against p4: define IEEE80211_FIXED_RATE_NONE and use it instead of -1 2005-07-22 17:37 sam add flags missed in last merge 2005-07-22 17:36 sam Diff reduction against p4: o add ic_flags_ext for eventual extention of ic_flags o define/reserve flag+capabilities bits for superg, bg scan, and roaming support o refactor debug msg macros MFC after: 3 days 2005-07-22 06:17 sam send a response when an auth request is denied due to an acl; might be better to silently ignore the frame but this way we give stations a chance of figuring out what's wrong 2005-07-22 06:15 sam remove excess whitespace 2005-07-22 05:55 sam use IF_HANDOFF when bridging frames internally so if_start gets called; fixes communication between associated sta's MFC after: 3 days 2005-07-11 04:06 sam Handle encrypt of arbitarily fragmented mbuf chains: previously we bailed if we couldn't collect the 16-bytes of data required for an aes block cipher in 2 mbufs; now we deal with it. While here make space accounting signed so a sanity check does the right thing for malformed mbuf chains. Approved by: re (scottl) 2005-07-11 04:00 sam nuke assert that duplicates real check Reviewed by: avatar Approved by: re (scottl)
2005-11-18 19:40:08 +03:00
#include <net80211/ieee80211_netbsd.h>
1994-07-03 15:45:41 +04:00
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD)
#include <sys/pax.h>
#endif /* PAX_MPROTECT || PAX_SEGVGUARD */
1994-07-03 15:45:41 +04:00
#include <ufs/ufs/quota.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/syncfs/syncfs.h>
1994-07-03 15:45:41 +04:00
#include <machine/cpu.h>
#include <uvm/uvm.h>
#if NSYSMON_TASKQ > 0
#include <dev/sysmon/sysmon_taskq.h>
#endif
#include <dev/cons.h>
#if NSYSMON_ENVSYS > 0 || NSYSMON_POWER > 0 || NSYSMON_WDOG > 0
#include <dev/sysmon/sysmonvar.h>
#endif
1996-02-04 05:15:01 +03:00
#include <net/if.h>
#include <net/raw_cb.h>
1994-07-03 15:45:41 +04:00
First take at security model abstraction. - Add a few scopes to the kernel: system, network, and machdep. - Add a few more actions/sub-actions (requests), and start using them as opposed to the KAUTH_GENERIC_ISSUSER place-holders. - Introduce a basic set of listeners that implement our "traditional" security model, called "bsd44". This is the default (and only) model we have at the moment. - Update all relevant documentation. - Add some code and docs to help folks who want to actually use this stuff: * There's a sample overlay model, sitting on-top of "bsd44", for fast experimenting with tweaking just a subset of an existing model. This is pretty cool because it's *really* straightforward to do stuff you had to use ugly hacks for until now... * And of course, documentation describing how to do the above for quick reference, including code samples. All of these changes were tested for regressions using a Python-based testsuite that will be (I hope) available soon via pkgsrc. Information about the tests, and how to write new ones, can be found on: http://kauth.linbsd.org/kauthwiki NOTE FOR DEVELOPERS: *PLEASE* don't add any code that does any of the following: - Uses a KAUTH_GENERIC_ISSUSER kauth(9) request, - Checks 'securelevel' directly, - Checks a uid/gid directly. (or if you feel you have to, contact me first) This is still work in progress; It's far from being done, but now it'll be a lot easier. Relevant mailing list threads: http://mail-index.netbsd.org/tech-security/2006/01/25/0011.html http://mail-index.netbsd.org/tech-security/2006/03/24/0001.html http://mail-index.netbsd.org/tech-security/2006/04/18/0000.html http://mail-index.netbsd.org/tech-security/2006/05/15/0000.html http://mail-index.netbsd.org/tech-security/2006/08/01/0000.html http://mail-index.netbsd.org/tech-security/2006/08/25/0000.html Many thanks to YAMAMOTO Takashi, Matt Thomas, and Christos Zoulas for help stablizing kauth(9). Full credit for the regression tests, making sure these changes didn't break anything, goes to Matt Fleming and Jaime Fournier. Happy birthday Randi! :)
2006-09-09 00:58:56 +04:00
#include <secmodel/secmodel.h>
extern struct proc proc0;
extern struct lwp lwp0;
extern struct cwdinfo cwdi0;
extern time_t rootfstime;
2003-01-18 13:06:22 +03:00
#ifndef curlwp
struct lwp *curlwp = &lwp0;
#endif
1997-10-10 12:19:41 +04:00
struct proc *initproc;
1994-07-03 15:45:41 +04:00
struct vnode *rootvp, *swapdev_vp;
int boothowto;
int cold = 1; /* still working on startup */
struct timeval boottime; /* time at system startup - will only follow settime deltas */
int ncpu = 0; /* number of CPUs configured, assume 1 */
1994-07-03 15:45:41 +04:00
volatile int start_init_exec; /* semaphore for start_init() */
2005-12-11 15:16:03 +03:00
static void check_console(struct lwp *l);
static void start_init(void *);
void main(void);
#if defined(__SSP__) || defined(__SSP_ALL__)
long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0};
void __stack_chk_fail(void);
void
__stack_chk_fail(void)
{
panic("stack overflow detected; terminated");
}
#endif
void __secmodel_none(void);
__weak_alias(secmodel_start,__secmodel_none);
void
__secmodel_none(void)
{
return;
}
1994-07-03 15:45:41 +04:00
/*
* System startup; initialize the world, create process 0, mount root
* filesystem, and fork to create init and pagedaemon. Most of the
* hard work is done in the lower-level initialization routines including
* startup(), which does memory initialization and autoconfiguration.
*/
1996-02-04 05:15:01 +03:00
void
main(void)
1994-07-03 15:45:41 +04:00
{
#ifdef __HAVE_TIMECOUNTER
struct timeval time;
#endif
2003-01-18 13:06:22 +03:00
struct lwp *l;
struct proc *p;
struct pdevinit *pdev;
int s, error;
1994-07-03 15:45:41 +04:00
extern struct pdevinit pdevinit[];
#ifdef NVNODE_IMPLICIT
int usevnodes;
#endif
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
1994-07-03 15:45:41 +04:00
2003-01-18 13:06:22 +03:00
l = &lwp0;
l->l_cpu = curcpu();
l->l_proc = &proc0;
l->l_lid = 1;
2004-03-09 05:35:45 +03:00
/*
* XXX This is a temporary check to be removed before
* NetBSD 5.0 is released.
*/
#if !defined(__i386__ ) && !defined(__x86_64__)
if (curlwp != l) {
printf("NOTICE: curlwp should be set before main()\n");
DELAY(250000);
curlwp = l;
}
#endif
1994-07-03 15:45:41 +04:00
/*
* Attempt to find console and initialize
* in case of early panic or other messages.
*/
consinit();
KERNEL_LOCK_INIT();
uvm_init();
2007-02-10 00:55:00 +03:00
#ifdef DEBUG
debug_init();
#endif
kmem_init();
/* Initialize the extent manager. */
extent_init();
/* Do machine-dependent initialization. */
cpu_startup();
/* Initialize callouts, part 1. */
callout_startup();
/*
* Initialize the kernel authorization subsystem and start the
* default security model, if any. We need to do this early
* enough so that subsystems relying on any of the aforementioned
* can work properly. Since the security model may dictate the
* credential inheritance policy, it is needed at least before
* any process is created, specifically proc0.
*/
kauth_init();
secmodel_start();
/* Initialize the buffer cache */
bufinit();
/*
* Initialize mbuf's. Do this now because we might attempt to
* allocate mbufs or mbuf clusters during autoconfiguration.
*/
mbinit();
1998-08-02 08:53:11 +04:00
/* Initialize sockets. */
soinit();
/*
* The following things must be done before autoconfiguration.
*/
evcnt_init(); /* initialize event counters */
#if NRND > 0
rnd_init(); /* initialize RNG */
#endif
/* Initialize process and pgrp structures. */
procinit();
lwpinit();
/* Initialize signal-related data structures. */
signal_init();
1994-07-03 15:45:41 +04:00
/* Create process 0 (the swapper). */
proc0_init();
1994-07-03 15:45:41 +04:00
/* Initialize the UID hash table. */
uid_init();
/* Charge root for one process. */
1994-07-03 15:45:41 +04:00
(void)chgproccnt(0, 1);
2007-02-10 00:55:00 +03:00
/* Initialize the run queues, turnstiles and sleep queues. */
mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
sched_rqinit();
2007-02-10 00:55:00 +03:00
turnstile_init();
sleeptab_init(&sleeptab);
/* MI initialization of the boot cpu */
error = mi_cpu_attach(curcpu());
KASSERT(error == 0);
2007-02-10 00:55:00 +03:00
/* Initialize the sysctl subsystem. */
sysctl_init();
1994-07-03 15:45:41 +04:00
/* Initialize I/O statistics. */
iostat_init();
1994-07-03 15:45:41 +04:00
/* Initialize the file systems. */
#ifdef NVNODE_IMPLICIT
/*
* If maximum number of vnodes in namei vnode cache is not explicitly
* defined in kernel config, adjust the number such as we use roughly
* 1.0% of memory for vnode cache (but not less than NVNODE vnodes).
*/
usevnodes = (ptoa((unsigned)physmem) / 100) / sizeof(struct vnode);
if (usevnodes > desiredvnodes)
desiredvnodes = usevnodes;
#endif
1994-07-03 15:45:41 +04:00
vfsinit();
/* Initialize fstrans. */
fstrans_init();
/* Initialize the select()/poll() system calls. */
selsysinit();
/* Initialize asynchronous I/O. */
aio_sysinit();
/* Initialize message queues. */
mqueue_sysinit();
/* Initialize the system monitor subsystems. */
#if NSYSMON_TASKQ > 0
sysmon_task_queue_preinit();
#endif
#if NSYSMON_ENVSYS > 0
sysmon_envsys_init();
#endif
#if NSYSMON_POWER > 0
sysmon_power_init();
#endif
#if NSYSMON_WDOG > 0
sysmon_wdog_init();
#endif
#ifdef __HAVE_TIMECOUNTER
inittimecounter();
ntp_init();
#endif /* __HAVE_TIMECOUNTER */
/* Initialize the device switch tables. */
devsw_init();
2007-07-21 23:51:47 +04:00
/* Initialize the disk wedge subsystem. */
dkwedge_init();
/* Configure the system hardware. This will enable interrupts. */
configure();
1994-07-03 15:45:41 +04:00
#if defined(__SSP__) || defined(__SSP_ALL__)
{
#ifdef DIAGNOSTIC
printf("Initializing SSP:");
#endif
/*
* We initialize ssp here carefully:
* 1. after we got some entropy
* 2. without calling a function
*/
size_t i;
long guard[__arraycount(__stack_chk_guard)];
arc4randbytes(guard, sizeof(guard));
for (i = 0; i < __arraycount(guard); i++)
__stack_chk_guard[i] = guard[i];
#ifdef DIAGNOSTIC
for (i = 0; i < __arraycount(guard); i++)
printf("%lx ", guard[i]);
printf("\n");
#endif
}
#endif
ubc_init(); /* must be after autoconfig */
/* Lock the kernel on behalf of proc0. */
2007-02-10 00:55:00 +03:00
KERNEL_LOCK(1, l);
#ifdef SYSTRACE
systrace_init();
#endif
1994-07-03 15:45:41 +04:00
#ifdef SYSVSHM
/* Initialize System V style shared memory. */
shminit();
#endif
#ifdef SYSVSEM
/* Initialize System V style semaphores. */
seminit();
#endif
#ifdef SYSVMSG
/* Initialize System V style message queues. */
msginit();
#endif
#ifdef P1003_1B_SEMAPHORE
/* Initialize posix semaphores */
ksem_init();
#endif
#if NVERIEXEC > 0
/*
* Initialise the Veriexec subsystem.
*/
veriexec_init();
#endif /* NVERIEXEC > 0 */
#if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD)
pax_init();
#endif /* PAX_MPROTECT || PAX_SEGVGUARD */
1994-07-03 15:45:41 +04:00
/* Attach pseudo-devices. */
for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
(*pdev->pdev_attach)(pdev->pdev_count);
#ifdef FAST_IPSEC
/* Attach network crypto subsystem */
ipsec_attach();
#endif
1994-07-03 15:45:41 +04:00
/*
* Initialize protocols. Block reception of incoming packets
* until everything is ready.
*/
s = splnet();
1994-07-03 15:45:41 +04:00
ifinit();
domaininit();
if_attachdomain();
1994-07-03 15:45:41 +04:00
splx(s);
#ifdef GPROF
/* Initialize kernel profiling. */
kmstartup();
#endif
/* Initialize system accouting. */
acct_init();
#ifndef PIPE_SOCKETPAIR
2007-03-13 00:31:03 +03:00
/* Initialize pipes. */
pipe_init();
#endif
2007-03-13 00:31:03 +03:00
/* Setup the scheduler */
sched_init();
1994-07-03 15:45:41 +04:00
2007-02-10 00:55:00 +03:00
#ifdef KTRACE
/* Initialize ktrace. */
ktrinit();
#endif
/* Initialize the UUID system calls. */
uuid_init();
/*
* Create process 1 (init(8)). We do this now, as Unix has
* historically had init be process 1, and changing this would
* probably upset a lot of people.
*
* Note that process 1 won't immediately exec init(8), but will
* wait for us to inform it that the root file system has been
* mounted.
*/
2003-01-18 13:06:22 +03:00
if (fork1(l, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
panic("fork init");
/*
* Now that device driver threads have been created, wait for
* them to finish any deferred autoconfiguration. Note we don't
* need to lock this semaphore, since we haven't booted any
* secondary processors, yet.
*/
while (config_pending)
(void) tsleep(&config_pending, PWAIT, "cfpend", 0);
/*
* Finalize configuration now that all real devices have been
* found. This needs to be done before the root device is
* selected, since finalization may create the root device.
*/
config_finalize();
/*
* Now that autoconfiguration has completed, we can determine
* the root and dump devices.
*/
cpu_rootconf();
cpu_dumpconf();
1994-07-03 15:45:41 +04:00
/* Mount the root file system. */
do {
domountroothook();
if ((error = vfs_mountroot())) {
printf("cannot mount root, error = %d\n", error);
boothowto |= RB_ASKNAME;
setroot(root_device,
(rootdev != NODEV) ? DISKPART(rootdev) : 0);
}
} while (error != 0);
mountroothook_destroy();
/*
* Initialise the time-of-day clock, passing the time recorded
* in the root filesystem (if any) for use by systems that
* don't have a non-volatile time-of-day device.
*/
inittodr(rootfstime);
CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
1994-07-03 15:45:41 +04:00
/*
* Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to
* reference it.
*/
error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
if (error)
panic("cannot find root vnode, error=%d", error);
cwdi0.cwdi_cdir = rootvnode;
VREF(cwdi0.cwdi_cdir);
1998-03-01 05:20:01 +03:00
VOP_UNLOCK(rootvnode, 0);
cwdi0.cwdi_rdir = NULL;
1994-07-03 15:45:41 +04:00
/*
* Now that root is mounted, we can fixup initproc's CWD
* info. All other processes are kthreads, which merely
* share proc0's CWD info.
1994-07-03 15:45:41 +04:00
*/
initproc->p_cwdi->cwdi_cdir = rootvnode;
VREF(initproc->p_cwdi->cwdi_cdir);
initproc->p_cwdi->cwdi_rdir = NULL;
1994-07-03 15:45:41 +04:00
/*
* Now can look at time, having had a chance to verify the time
2007-02-10 00:55:00 +03:00
* from the file system. Reset l->l_rtime as it may have been
* munched in mi_switch() after the time got set.
*/
#ifdef __HAVE_TIMECOUNTER
getmicrotime(&time);
#else
mono_time = time;
#endif
boottime = time;
mutex_enter(&proclist_lock);
2003-11-02 19:42:22 +03:00
LIST_FOREACH(p, &allproc, p_list) {
KASSERT((p->p_flag & PK_MARKER) == 0);
2007-02-10 00:55:00 +03:00
mutex_enter(&p->p_smutex);
p->p_stats->p_start = time;
2003-11-02 19:42:22 +03:00
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2007-02-10 00:55:00 +03:00
lwp_lock(l);
l->l_cpu->ci_schedstate.spc_runtime = time;
l->l_rtime.tv_sec = l->l_rtime.tv_usec = 0;
lwp_unlock(l);
2003-11-02 19:42:22 +03:00
}
2007-02-10 00:55:00 +03:00
mutex_exit(&p->p_smutex);
}
mutex_exit(&proclist_lock);
1994-07-03 15:45:41 +04:00
for (CPU_INFO_FOREACH(cii, ci)) {
ci->ci_schedstate.spc_lastmod = time_second;
}
/* Create the pageout daemon kernel thread. */
uvm_swap_init();
if (kthread_create(PVM, 0, NULL, uvm_pageout,
NULL, NULL, "pgdaemon"))
panic("fork pagedaemon");
1994-07-03 15:45:41 +04:00
/* Create the filesystem syncer kernel thread. */
if (kthread_create(PINOD, 0, NULL, sched_sync, NULL, NULL, "ioflush"))
panic("fork syncer");
/* Create the aiodone daemon kernel thread. */
if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
uvm_aiodone_worker, NULL, PVM, IPL_BIO, 0))
panic("fork aiodoned");
2007-06-17 17:34:42 +04:00
vmem_rehash_start();
#if defined(MULTIPROCESSOR)
/* Boot the secondary processors. */
cpu_boot_secondary_processors();
#endif
/* Initialize exec structures */
exec_init(1);
/*
* Okay, now we can let init(8) exec! It's off to userland!
*/
start_init_exec = 1;
wakeup(&start_init_exec);
1994-07-03 15:45:41 +04:00
/* The scheduler is an infinite loop. */
uvm_scheduler();
1994-07-03 15:45:41 +04:00
/* NOTREACHED */
}
static void
2005-12-11 15:16:03 +03:00
check_console(struct lwp *l)
{
struct nameidata nd;
int error;
2005-12-11 15:16:03 +03:00
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console", l);
error = namei(&nd);
if (error == 0)
vrele(nd.ni_vp);
else if (error == ENOENT)
printf("warning: no /dev/console\n");
else
printf("warning: lookup /dev/console: error %d\n", error);
}
1994-07-03 15:45:41 +04:00
/*
* List of paths to try when searching for "init".
*/
static const char *initpaths[] = {
1994-07-03 15:45:41 +04:00
"/sbin/init",
"/sbin/oinit",
"/sbin/init.bak",
NULL,
};
/*
* Start the initial user process; try exec'ing each pathname in "initpaths".
* The program is invoked with one argument containing the boot flags.
*/
static void
start_init(void *arg)
1994-07-03 15:45:41 +04:00
{
2003-01-18 13:06:22 +03:00
struct lwp *l = arg;
struct proc *p = l->l_proc;
vaddr_t addr;
struct sys_execve_args /* {
1997-10-19 06:00:19 +04:00
syscallarg(const char *) path;
syscallarg(char * const *) argp;
syscallarg(char * const *) envp;
} */ args;
int options, i, error;
register_t retval[2];
char flags[4], *flagsp;
const char *path, *slash;
char *ucp, **uap, *arg0, *arg1 = NULL;
char ipath[129];
int ipx, len;
1994-07-03 15:45:41 +04:00
/*
* Now in process 1.
*/
1999-04-12 04:22:08 +04:00
strncpy(p->p_comm, "init", MAXCOMLEN);
1994-07-03 15:45:41 +04:00
/*
* Wait for main() to tell us that it's safe to exec.
*/
while (start_init_exec == 0)
(void) tsleep(&start_init_exec, PWAIT, "initexec", 0);
/*
* This is not the right way to do this. We really should
* hand-craft a descriptor onto /dev/console to hand to init,
* but that's a _lot_ more work, and the benefit from this easy
* hack makes up for the "good is the enemy of the best" effect.
*/
2005-12-11 15:16:03 +03:00
check_console(l);
1994-07-03 15:45:41 +04:00
/*
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
addr = (vaddr_t)STACK_ALLOC(USRSTACK, PAGE_SIZE);
if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
NULL, UVM_UNKNOWN_OFFSET, 0,
UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
UVM_ADV_NORMAL,
UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
panic("init: couldn't allocate argument space");
p->p_vmspace->vm_maxsaddr = (void *)STACK_MAX(addr, PAGE_SIZE);
1994-07-03 15:45:41 +04:00
ipx = 0;
while (1) {
if (boothowto & RB_ASKNAME) {
printf("init path");
if (initpaths[ipx])
printf(" (default %s)", initpaths[ipx]);
printf(": ");
len = cngetsn(ipath, sizeof(ipath)-1);
if (len == 0) {
if (initpaths[ipx])
path = initpaths[ipx++];
else
continue;
} else {
ipath[len] = '\0';
path = ipath;
}
} else {
if ((path = initpaths[ipx++]) == NULL)
break;
}
ucp = (char *)USRSTACK;
1994-07-03 15:45:41 +04:00
/*
* Construct the boot flag argument.
1994-07-03 15:45:41 +04:00
*/
flagsp = flags;
*flagsp++ = '-';
1994-07-03 15:45:41 +04:00
options = 0;
1994-07-03 15:45:41 +04:00
if (boothowto & RB_SINGLE) {
*flagsp++ = 's';
1994-07-03 15:45:41 +04:00
options = 1;
}
#ifdef notyet
if (boothowto & RB_FASTBOOT) {
*flagsp++ = 'f';
1994-07-03 15:45:41 +04:00
options = 1;
}
#endif
/*
* Move out the flags (arg 1), if necessary.
*/
if (options != 0) {
*flagsp++ = '\0';
i = flagsp - flags;
#ifdef DEBUG
1996-10-13 06:32:29 +04:00
printf("init: copying out flags `%s' %d\n", flags, i);
#endif
arg1 = STACK_ALLOC(ucp, i);
ucp = STACK_MAX(arg1, i);
(void)copyout((void *)flags, arg1, i);
}
1994-07-03 15:45:41 +04:00
/*
* Move out the file name (also arg 0).
*/
i = strlen(path) + 1;
#ifdef DEBUG
1996-10-13 06:32:29 +04:00
printf("init: copying out path `%s' %d\n", path, i);
#else
if (boothowto & RB_ASKNAME || path != initpaths[0])
printf("init: trying %s\n", path);
#endif
arg0 = STACK_ALLOC(ucp, i);
ucp = STACK_MAX(arg0, i);
(void)copyout(path, arg0, i);
1994-07-03 15:45:41 +04:00
/*
* Move out the arg pointers.
*/
ucp = (void *)STACK_ALIGN(ucp, ALIGNBYTES);
uap = (char **)STACK_ALLOC(ucp, sizeof(char *) * 3);
SCARG(&args, path) = arg0;
SCARG(&args, argp) = uap;
SCARG(&args, envp) = NULL;
slash = strrchr(path, '/');
if (slash)
(void)suword((void *)uap++,
(long)arg0 + (slash + 1 - path));
else
(void)suword((void *)uap++, (long)arg0);
if (options != 0)
(void)suword((void *)uap++, (long)arg1);
(void)suword((void *)uap++, 0); /* terminator */
1994-07-03 15:45:41 +04:00
/*
* Now try to exec the program. If can't for any reason
* other than it doesn't exist, complain.
*/
2005-12-11 15:16:03 +03:00
error = sys_execve(l, &args, retval);
if (error == 0 || error == EJUSTRETURN) {
2007-02-10 00:55:00 +03:00
KERNEL_UNLOCK_LAST(l);
1994-07-03 15:45:41 +04:00
return;
}
printf("exec %s: error %d\n", path, error);
1994-07-03 15:45:41 +04:00
}
1996-10-13 06:32:29 +04:00
printf("init: not found\n");
1994-07-03 15:45:41 +04:00
panic("no init");
}