diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 44f90b0d1631..9e362cd62e69 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos Exp $ */ +/* $NetBSD: uipc_socket2.c,v 1.116 2014/05/17 22:52:36 rmind Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -58,7 +58,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.116 2014/05/17 22:52:36 rmind Exp $"); #include "opt_mbuftrace.h" #include "opt_sb_max.h" @@ -82,13 +82,44 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos /* * Primitive routines for operating on sockets and socket buffers. * + * Connection life-cycle: + * + * Normal sequence from the active (originating) side: + * + * - soisconnecting() is called during processing of connect() call, + * - resulting in an eventual call to soisconnected() if/when the + * connection is established. + * + * When the connection is torn down during processing of disconnect(): + * + * - soisdisconnecting() is called and, + * - soisdisconnected() is called when the connection to the peer + * is totally severed. + * + * The semantics of these routines are such that connectionless protocols + * can call soisconnected() and soisdisconnected() only, bypassing the + * in-progress calls when setting up a ``connection'' takes no time. + * + * From the passive side, a socket is created with two queues of sockets: + * + * - so_q0 (0) for partial connections (i.e. connections in progress) + * - so_q (1) for connections already made and awaiting user acceptance. + * + * As a protocol is preparing incoming connections, it creates a socket + * structure queued on so_q0 by calling sonewconn(). When the connection + * is established, soisconnected() is called, and transfers the + * socket structure to so_q, making it available to accept(). + * + * If a socket is closed with sockets on either so_q0 or so_q, these + * sockets are dropped. + * * Locking rules and assumptions: * * o socket::so_lock can change on the fly. The low level routines used * to lock sockets are aware of this. When so_lock is acquired, the * routine locking must check to see if so_lock still points to the * lock that was acquired. If so_lock has changed in the meantime, the - * now irellevant lock that was acquired must be dropped and the lock + * now irrelevant lock that was acquired must be dropped and the lock * operation retried. Although not proven here, this is completely safe * on a multiprocessor system, even with relaxed memory ordering, given * the next two rules: @@ -115,45 +146,14 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.115 2013/10/08 19:58:25 christos * locking the socket must also lock the sockets attached to both queues. * Again, their lock pointers must match. * - * o Beyond the initial lock assigment in socreate(), assigning locks to + * o Beyond the initial lock assignment in socreate(), assigning locks to * sockets is the responsibility of the individual protocols / protocol * domains. */ -static pool_cache_t socket_cache; - -u_long sb_max = SB_MAX; /* maximum socket buffer size */ -static u_long sb_max_adj; /* adjusted sb_max */ - -/* - * Procedures to manipulate state flags of socket - * and do appropriate wakeups. Normal sequence from the - * active (originating) side is that soisconnecting() is - * called during processing of connect() call, - * resulting in an eventual call to soisconnected() if/when the - * connection is established. When the connection is torn down - * soisdisconnecting() is called during processing of disconnect() call, - * and soisdisconnected() is called when the connection to the peer - * is totally severed. The semantics of these routines are such that - * connectionless protocols can call soisconnected() and soisdisconnected() - * only, bypassing the in-progress calls when setting up a ``connection'' - * takes no time. - * - * From the passive side, a socket is created with - * two queues of sockets: so_q0 for connections in progress - * and so_q for connections already made and awaiting user acceptance. - * As a protocol is preparing incoming connections, it creates a socket - * structure queued on so_q0 by calling sonewconn(). When the connection - * is established, soisconnected() is called, and transfers the - * socket structure to so_q, making it available to accept(). - * - * If a socket is closed with sockets on either - * so_q0 or so_q, these sockets are dropped. - * - * If higher level protocols are implemented in - * the kernel, the wakeups done here will sometimes - * cause software-interrupt process scheduling. - */ +static pool_cache_t socket_cache; +u_long sb_max = SB_MAX;/* maximum socket buffer size */ +static u_long sb_max_adj; /* adjusted sb_max */ void soisconnecting(struct socket *so) @@ -179,6 +179,10 @@ soisconnected(struct socket *so) so->so_state |= SS_ISCONNECTED; if (head && so->so_onq == &head->so_q0) { if ((so->so_options & SO_ACCEPTFILTER) == 0) { + /* + * Re-enqueue and wake up any waiters, e.g. + * processes blocking on accept(). + */ soqremque(so, 0); soqinsque(head, so, 1); sorwakeup(head); @@ -234,33 +238,41 @@ soinit2(void) } /* - * When an attempt at a new connection is noted on a socket - * which accepts connections, sonewconn is called. If the - * connection is possible (subject to space constraints, etc.) - * then we allocate a new structure, propoerly linked into the - * data structure of the original socket, and return this. + * sonewconn: accept a new connection. + * + * When an attempt at a new connection is noted on a socket which accepts + * connections, sonewconn(9) is called. If the connection is possible + * (subject to space constraints, etc) then we allocate a new structure, + * properly linked into the data structure of the original socket. + * + * => If 'soready' is true, then socket will become ready for accept() i.e. + * inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken. + * => May be called from soft-interrupt context. + * => Listening socket should be locked. + * => Returns the new socket locked. */ struct socket * -sonewconn(struct socket *head, bool conncomplete) +sonewconn(struct socket *head, bool soready) { - struct socket *so; - int soqueue, error; + struct socket *so; + int soqueue, error; KASSERT(solocked(head)); - if ((head->so_options & SO_ACCEPTFILTER) != 0) - conncomplete = false; - soqueue = conncomplete ? 1 : 0; + if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) { + /* Listen queue overflow. */ + return NULL; + } + if ((head->so_options & SO_ACCEPTFILTER) != 0) { + soready = false; + } + soqueue = soready ? 1 : 0; - if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) + if ((so = soget(false)) == NULL) { return NULL; - so = soget(false); - if (so == NULL) - return NULL; - mutex_obj_hold(head->so_lock); - so->so_lock = head->so_lock; + } so->so_type = head->so_type; - so->so_options = head->so_options &~ SO_ACCEPTCONN; + so->so_options = head->so_options & ~SO_ACCEPTCONN; so->so_linger = head->so_linger; so->so_state = head->so_state | SS_NOFDREF; so->so_proto = head->so_proto; @@ -283,27 +295,39 @@ sonewconn(struct socket *head, bool conncomplete) so->so_snd.sb_timeo = head->so_snd.sb_timeo; so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); + + /* + * Share the lock with the listening-socket, it may get unshared + * once the connection is complete. + */ + mutex_obj_hold(head->so_lock); + so->so_lock = head->so_lock; soqinsque(head, so, soqueue); + error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL, NULL); KASSERT(solocked(so)); - if (error != 0) { + if (error) { (void) soqremque(so, soqueue); out: - /* - * Remove acccept filter if one is present. - * XXX Is this really needed? - */ - if (so->so_accf != NULL) - (void)accept_filt_clear(so); + KASSERT(so->so_accf == NULL); soput(so); + + /* Note: the listening socket shall stay locked. */ + KASSERT(solocked(head)); return NULL; } - if (conncomplete) { + + /* + * Update the connection status and wake up any waiters, + * e.g. processes blocking on accept(). + */ + if (soready) { + so->so_state |= SS_ISCONNECTED; sorwakeup(head); cv_broadcast(&head->so_cv); - so->so_state |= SS_ISCONNECTED; } + KASSERT(solocked2(head, so)); return so; } @@ -344,16 +368,20 @@ soput(struct socket *so) pool_cache_put(socket_cache, so); } +/* + * soqinsque: insert socket of a new connection into the specified + * accept queue of the listening socket (head). + * + * q = 0: queue of partial connections + * q = 1: queue of incoming connections + */ void soqinsque(struct socket *head, struct socket *so, int q) { - + KASSERT(q == 0 || q == 1); KASSERT(solocked2(head, so)); - -#ifdef DIAGNOSTIC - if (so->so_onq != NULL) - panic("soqinsque"); -#endif + KASSERT(so->so_onq == NULL); + KASSERT(so->so_head == NULL); so->so_head = head; if (q == 0) { @@ -366,54 +394,62 @@ soqinsque(struct socket *head, struct socket *so, int q) TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); } -int +/* + * soqremque: remove socket from the specified queue. + * + * => Returns true if socket was removed from the specified queue. + * => False if socket was not removed (because it was in other queue). + */ +bool soqremque(struct socket *so, int q) { - struct socket *head; - - head = so->so_head; + struct socket *head = so->so_head; + KASSERT(q == 0 || q == 1); KASSERT(solocked(so)); + KASSERT(so->so_onq != NULL); + KASSERT(head != NULL); + if (q == 0) { if (so->so_onq != &head->so_q0) - return (0); + return false; head->so_q0len--; } else { if (so->so_onq != &head->so_q) - return (0); + return false; head->so_qlen--; } KASSERT(solocked2(so, head)); TAILQ_REMOVE(so->so_onq, so, so_qe); so->so_onq = NULL; so->so_head = NULL; - return (1); + return true; } /* - * Socantsendmore indicates that no more data will be sent on the + * socantsendmore: indicates that no more data will be sent on the * socket; it would normally be applied to a socket when the user * informs the system that no more data is to be sent, by the protocol - * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data - * will be received, and will normally be applied to the socket by a - * protocol when it detects that the peer will send no more data. - * Data queued for reading in the socket may yet be read. + * code (in case PRU_SHUTDOWN). */ - void socantsendmore(struct socket *so) { - KASSERT(solocked(so)); so->so_state |= SS_CANTSENDMORE; sowwakeup(so); } +/* + * socantrcvmore(): indicates that no more data will be received and + * will normally be applied to the socket by a protocol when it detects + * that the peer will send no more data. Data queued for reading in + * the socket may yet be read. + */ void socantrcvmore(struct socket *so) { - KASSERT(solocked(so)); so->so_state |= SS_CANTRCVMORE; @@ -539,8 +575,7 @@ sb_max_set(u_long new_sbmax) int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { - - KASSERT(so->so_lock == NULL || solocked(so)); + KASSERT(so->so_pcb == NULL || solocked(so)); /* * there's at least one application (a configure script of screen) @@ -585,7 +620,7 @@ sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) rlim_t maxcc; struct uidinfo *uidinfo; - KASSERT(so->so_lock == NULL || solocked(so)); + KASSERT(so->so_pcb == NULL || solocked(so)); KASSERT(sb->sb_so == so); KASSERT(sb_max_adj != 0); @@ -1374,16 +1409,14 @@ solocked2(struct socket *so1, struct socket *so2) } /* - * Assign a default lock to a new socket. For PRU_ATTACH, and done by - * protocols that do not have special locking requirements. + * sosetlock: assign a default lock to a new socket. */ void sosetlock(struct socket *so) { - kmutex_t *lock; - if (so->so_lock == NULL) { - lock = softnet_lock; + kmutex_t *lock = softnet_lock; + so->so_lock = lock; mutex_obj_hold(lock); mutex_enter(lock); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index dc2b0469fb11..10679676560f 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $NetBSD: socketvar.h,v 1.131 2013/08/29 17:49:21 rmind Exp $ */ +/* $NetBSD: socketvar.h,v 1.132 2014/05/17 22:52:36 rmind Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -308,7 +308,7 @@ int solisten(struct socket *, int, struct lwp *); struct socket * sonewconn(struct socket *, bool); void soqinsque(struct socket *, struct socket *, int); -int soqremque(struct socket *, int); +bool soqremque(struct socket *, int); int soreceive(struct socket *, struct mbuf **, struct uio *, struct mbuf **, struct mbuf **, int *); int soreserve(struct socket *, u_long, u_long);