.\" $NetBSD: mbuf.9,v 1.33 2004/12/15 15:39:03 wiz Exp $ .\" .\" Copyright (c) 1997 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This documentation is derived from text contributed to The NetBSD Foundation .\" by S.P.Zeidler (aka stargazer). .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by the NetBSD .\" Foundation, Inc. and its contributors. .\" 4. Neither the name of The NetBSD Foundation nor the names of its .\" contributors may be used to endorse or promote products derived .\" from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .Dd December 12, 2004 .Dt MBUF 9 .Os .Sh NAME .Nm mbuf , .Nm m_get , .Nm m_getclr , .Nm m_gethdr , .Nm m_devget , .Nm m_copym , .Nm m_copypacket , .Nm m_copydata , .Nm m_copyback , .Nm m_copyback_cow , .Nm m_cat , .Nm m_dup , .Nm m_makewritable , .Nm m_prepend , .Nm m_pulldown , .Nm m_pullup , .Nm m_split , .Nm m_adj , .Nm m_apply , .Nm m_free , .Nm m_freem , .Nm mtod , .Nm mtocl , .Nm cltom , .Nm MGET , .Nm MGETHDR , .Nm MEXTMALLOC , .Nm MEXTADD , .Nm MCLGET , .Nm M_COPY_PKTHDR , .Nm M_ALIGN , .Nm MH_ALIGN , .Nm M_LEADINGSPACE , .Nm M_TRAILINGSPACE , .Nm M_PREPEND , .Nm MCHTYPE , .Nm MEXTREMOVE , .Nm MFREE , .Nd "functions and macros for managing memory used by networking code" .Sh SYNOPSIS .In sys/mbuf.h .Ft struct mbuf * .Fn m_get "int nowait" "int type" .Ft struct mbuf * .Fn m_getclr "int nowait" "int type" .Ft struct mbuf * .Fn m_gethdr "int nowait" "int type" .Ft struct mbuf * .Fn m_devget "char *buf" "int totlen" "int off0" "struct ifnet *ifp" "void (*copy)(const void *, void *, size_t)" .Ft struct mbuf * .Fn m_copym "struct mbuf *m" "int off0" "int len" "int wait" .Ft struct mbuf * .Fn m_copypacket "struct mbuf *m" "int how" .Ft void .Fn m_copydata "struct mbuf *m" "int off" "int len" "caddr_t cp" .Ft void .Fn m_copyback "struct mbuf *m0" "int off" "int len" "caddr_t cp" .Ft struct mbuf * .Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "caddr_t cp" "int how" .Ft int .Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how" .Ft void .Fn m_cat "struct mbuf *m" "struct mbuf *n" .Ft struct mbuf * .Fn m_dup "struct mbuf *m" "int off0" "int len" "int wait" .Ft struct mbuf * .Fn m_prepend "struct mbuf *m" "int len" "int how" .Ft struct mbuf * .Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp" .Ft struct mbuf * .Fn m_pullup "struct mbuf *n" "int len" .Ft struct mbuf * .Fn m_split "struct mbuf *m0" "int len0" "int wait" .Ft void .Fn m_adj "struct mbuf *mp" "int req_len" .Ft int .Fn m_apply "struct mbuf *m" "int off" "int len" "int *f(void *, caddr_t, unsigned int)" "void *arg" .Ft struct mbuf * .Fn m_free "struct mbuf *m" .Ft void .Fn m_freem "struct mbuf *m" .Ft datatype .Fn mtod "struct mbuf *m" "datatype" .Ft u_long .Fn mtocl "void *datapointer" .Ft caddr_t .Fn cltom "u_long clusternum" .Ft void .Fn MGET "struct mbuf *m" "int how" "int type" .Ft void .Fn MGETHDR "struct mbuf *m" "int how" "int type" .Ft void .Fn MEXTMALLOC "struct mbuf *m" "int len" "int how" .Ft void .Fn MEXTADD "struct mbuf *m" "caddr_t buf" "int size" "int type" "void (*free)(struct mbuf *, caddr_t, size_t, void *)" "void *arg" .Ft void .Fn MCLGET "struct mbuf *m" "int how" .Ft void .Fn M_COPY_PKTHDR "struct mbuf *to" "struct mbuf *from" .Ft void .Fn M_ALIGN "struct mbuf *m" "int len" .Ft void .Fn MH_ALIGN "struct mbuf *m" "int len" .Ft int .Fn M_LEADINGSPACE "struct mbuf *m" .Ft int .Fn M_TRAILINGSPACE "struct mbuf *m" .Ft void .Fn M_PREPEND "struct mbuf *m" "int plen" "int how" .Ft void .Fn MCHTYPE "struct mbuf *m" "int type" .Ft void .Fn MEXTREMOVE "struct mbuf *m" .Ft void .Fn MFREE "struct mbuf *m" "struct mbuf *n" .Sh DESCRIPTION The .Nm functions and macros provide an easy and consistent way to handle a networking stack's memory management needs. An .Nm consists of a header and a data area. It is of a fixed size, .Dv MSIZE .Pq defined in Aq Pa machine/param.h , which includes overhead. The header contains a pointer to the next .Nm in the .Sq "mbuf chain" , a pointer to the next .Sq "mbuf chain" , a pointer to the data area, the amount of data in this mbuf, its type and a .Dv flags field. .Pp The .Dv type variable can signify: .Bl -tag -compact -offset indent -width "XXXXXXXXXXX" .It Dv MT_FREE the mbuf should be on the ``free'' list .It Dv MT_DATA data was dynamically allocated .It Dv MT_HEADER data is a packet header .It Dv MT_SONAME data is a socket name .It Dv MT_SOOPTS data is socket options .It Dv MT_FTABLE data is the fragment reassembly header .It Dv MT_CONTROL mbuf contains ancillary \&(protocol control\&) data .It Dv MT_OOBDATA mbuf contains out-of-band data. .El .Pp The .Dv flags variable contains information describing the .Nm , notably: .Bl -tag -compact -offset indent -width "XXXXXXXXXXX" .It Dv M_EXT has external storage .It Dv M_PKTHDR is start of record .It Dv M_EOR is end of record .It Dv M_CLUSTER external storage is a cluster. .El .Pp If an .Nm designates the start of a record .Pq Dv M_PKTHDR , its .Dv flags field may contain additional information describing the content of the record: .Bl -tag -compact -offset indent -width "XXXXXXXXXXX" .It Dv M_BCAST sent/received as link-level broadcast .It Dv M_MCAST sent/received as link-level multicast .It Dv M_LINK0 , .It Dv M_LINK1 , .It Dv M_LINK2 three link-level specific flags. .El .Pp An .Nm may add a single .Sq "mbuf cluster" of .Dv MCLBYTES bytes .Pq also defined in Aq Pa machine/param.h , which has no additional overhead and is used instead of the internal data area; this is done when at least .Dv MINCLSIZE bytes of data must be stored. .Pp When the .Dv M_EXT flag is raised for an mbuf, the external storage area could be shared among multiple mbufs. Be careful when you attempt to overwrite the data content of the mbuf. .Bl -tag -width compact .It Fn m_get "int nowait" "int type" Allocates an mbuf and initializes it to contain internal data. The .Fa nowait parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. .Dv M_WAIT means the call cannot fail, but may take forever. The .Fa type parameter is an mbuf type. .It Fn m_getclr "int nowait" "int type" Allocates an mbuf and initializes it to contain internal data, then zeros the data area. The .Fa nowait parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. The .Fa type parameter is an mbuf type. .It Fn m_gethdr "int nowait" "int type" Allocates an mbuf and initializes it to contain a packet header and internal data. The .Fa nowait parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. The .Fa type parameter is an mbuf type. .It Fn m_devget "char *buf" "int totlen" "int off0" "struct ifnet *ifp" "void (*copy)(const void *, void *, size_t)" Copies .Fa len bytes from device local memory into mbufs using copy routine .Fa copy . If parameter .Fa off is non-zero, the packet is supposed to be trailer-encapsulated and .Fa off bytes plus the type and length fields will be skipped before copying. Returns the top of the mbuf chain it created. .It Fn m_copym "struct mbuf *m" "int off0" "int len" "int wait" Creates a copy of an mbuf chain starting .Fa off0 bytes from the beginning, continuing for .Fa len bytes. If the .Fa len requested is .Dv M_COPYALL , the complete mbuf chain will be copied. The .Fa wait parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. .It Fn m_copypacket "struct mbuf *m" "int how" Copies an entire packet, including header (which must be present). This function is an optimization of the common case .Li m_copym(m, 0, Dv M_COPYALL, Fa how ) . .It Fn m_copydata "struct mbuf *m" "int off" "int len" "caddr_t cp" Copies .Fa len bytes data from mbuf chain .Fa m into the buffer .Fa cp , starting .Fa off bytes from the beginning. .It Fn m_copyback "struct mbuf *m0" "int off" "int len" "caddr_t cp" Copies .Fa len bytes data from buffer .Fa cp back into the mbuf chain .Fa m0 , starting .Fa off bytes from the beginning, extending the mbuf chain if necessary. .Fn m_copyback can only fail when extending the chain. The caller should check for this kind of failure by checking the resulting length of the chain in that case. It is an error to use .Fn m_copyback on read-only mbufs. .It Fn m_copyback_cow "struct mbuf *m0" "int off" "int len" "caddr_t cp" \ "int how" Copies .Fa len bytes data from buffer .Fa cp back into the mbuf chain .Fa m0 as .Fn m_copyback does. Unlike .Fn m_copyback , it is safe to use .Fn m_copyback_cow on read-only mbufs. If needed, .Fn m_copyback_cow automatically allocates new mbufs and adjusts the chain. It returns a pointer to the resulting mbuf chain on success. Otherwise, it returns .Dv NULL . In that case, the original mbuf .Fa m0 will be freed. The .Fa how parameter is a choice of .Dv M_WAIT / M_DONTWAIT from the caller. Unlike .Fn m_copyback , extending the mbuf chain isn't supported. It is an error to attempt to extend the mbuf chain using .Fn m_copyback_cow . .It Fn m_makewritable "struct mbuf **mp" "int off" "int len" "int how" Rearranges an mbuf chain so that .Fa len bytes from offset .Fa off are writable. When it meets read-only mbufs, it allocates new mbufs, adjusts the chain as .Fn m_copyback_cow does, and copies the original content into them. The .Fa how parameter is a choice of .Dv M_WAIT / M_DONTWAIT from the caller. .Fn m_makewritable preserves the contents of the mbuf chain even in the case of failure. It updates a pointer to the mbuf chain pointed to by .Fa mp . It returns 0 on success. Otherwise, it returns an error code, typically .Er ENOBUFS . .It Fn m_cat "struct mbuf *m" "struct mbuf *n" Concatenates mbuf chain .Fa n to .Fa m . Both chains must be of the same type; packet headers will .Em not be updated if present. .It Fn m_dup "struct mbuf *m" "int off0" "int len" "int wait" Similarly to .Fn m_copym , the function creates a copy of an mbuf chain starting .Fa off0 bytes from the beginning, continuing for .Fa len bytes. While .Fn m_copym tries to share external storage for mbufs with .Dv M_EXT flag, .Fn m_dup will deep-copy the whole data content into new mbuf chain and avoids shared external storage. .It Fn m_prepend "struct mbuf *m" "int len" "int how" Lesser-used path for .Fn M_PREPEND : allocates new mbuf .Fa m of size .Fa len to prepend to the chain, copying junk along. The .Fa how parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. .It Fn m_pulldown "struct mbuf *m" "int off" "int len" "int *offp" Rearranges an mbuf chain so that .Fa len bytes from offset .Fa off are contiguous and in the data area of an mbuf. The return value points to an mbuf in the middle of the mbuf chain .Fa m . If we call the return value .Fa n , the contiguous data region is available at .Li "mtod(n, caddr_t) + *offp" , or .Li "mtod(n, caddr_t)" if .Fa offp is .Dv NULL . The top of the mbuf chain .Fa m , and mbufs up to .Fa off , will not be modified. On successful return, it is guaranteed that the mbuf pointed to by .Fa n does not have a shared external storage, therefore it is safe to update the contiguous region. Returns .Dv NULL and frees the mbuf chain on failure. .Fa len must be smaller or equal than .Dv MCLBYTES . .It Fn m_pullup "struct mbuf *m" "int len" Rearranges an mbuf chain so that .Fa len bytes are contiguous and in the data area of an mbuf (so that .Fn mtod will work for a structure of size .Fa len ) . Returns the resulting mbuf chain on success, frees it and returns .Dv NULL on failure. If there is room, it will add up to .Dv max_protohdr - .Fa len extra bytes to the contiguous region to possibly avoid being called again. .Fa len must be smaller or equal than .Dv MHLEN . .It Fn m_split "struct mbuf *m0" "int len0" "int wait" Partitions an mbuf chain in two pieces, returning the tail, which is all but the first .Fa len0 bytes. In case of failure, it returns .Dv NULL and attempts to restore the chain to its original state. .It Fn m_adj "struct mbuf *mp" "int req_len" Shaves off .Fa req_len bytes from head or tail of the (valid) data area. If .Fa req_len is greater than zero, front bytes are being shaved off, if it's smaller, from the back (and if it is zero, the mbuf will stay bearded). This function does not move data in any way, but is used to manipulate the data area pointer and data length variable of the mbuf in a non-clobbering way. .It Fn m_apply "struct mbuf *m" "int off" "int len" "int (*f)(void *, caddr_t, unsigned int)" "void *arg" Apply function .Fa f to the data in an mbuf chain starting .Fa off bytes from the beginning, continuing for .Fa len bytes. Neither .Fa off nor .Fa len may be negative. .Fa arg will be supplied as first argument for .Fa f , the second argument will be the pointer to the data buffer of a packet (starting after .Fa off bytes in the stream), and the third argument is the amount of data in bytes in this call. If .Fa f returns something not equal to zero .Fn m_apply will bail out, returning the return code of .Fa f . Upon successful completion it will return zero. .It Fn m_free "struct mbuf *m" Frees mbuf .Fa m . .It Fn m_freem "struct mbuf *m" Frees the mbuf chain beginning with .Fa m . This function contains the elementary sanity check for a .Dv NULL pointer. .It Fn mtod "struct mbuf *m" "datatype" Returns a pointer to the data contained in the specified mbuf .Fa m , type-casted to the specified data type .Fa datatype . Implemented as a macro. .It Fn mtocl "void *datapointer" Takes a .Fa datapointer within an mbuf cluster and returns the cluster index number of the mbuf owning the data. Avoid this; it may be deprecated in the future. Implemented as a macro. .It Fn cltom "u_long clusternum" Takes an mbuf cluster index number .Fa clusternum and returns a pointer to the beginning of the cluster. Avoid this; it may be deprecated in the future. Implemented as a macro. .It Fn MGET "struct mbuf *m" "int how" "int type" Allocates mbuf .Fa m and initializes it to contain internal data. See .Fn m_get . Implemented as a macro. .It Fn MGETHDR "struct mbuf *m" "int how" "int type" Allocates mbuf .Fa m and initializes it to contain a packet header. See .Fn m_gethdr . Implemented as a macro. .It Fn MEXTMALLOC "struct mbuf *m" "int len" "int how" Allocates external storage of size .Fa len for mbuf .Fa m . The .Fa how parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. The flag .Dv M_EXT is set upon success. Implemented as a macro. .It Fn MEXTADD "struct mbuf *m" "caddr_t buf" "int size" "int type" "void (*free)(struct mbuf *, caddr_t, size_t, void *)" "void *arg" Adds pre-allocated external storage .Fa buf to a normal mbuf .Fa m ; the parameters .Fa size , .Fa type , .Fa free and .Fa arg describe the external storage. .Fa size is the size of the storage, .Fa type describes its .Xr malloc 9 type, .Fa free is a free routine (if not the usual one), and .Fa arg is a possible argument to the free routine. The flag .Dv M_EXT is set upon success. Implemented as a macro. If a free routine is specified, it will be called when the mbuf is freed or when MEXTREMOVE is called for the mbuf. In the case of former, the first argument for a free routine is the mbuf .Fa m and the routine is expected to free it in addition to the external storage pointed by second argument. In the case of latter, the first argument for the routine is NULL. .It Fn MCLGET "struct mbuf *m" "int how" Allocates and adds an mbuf cluster to a normal mbuf .Fa m . The .Fa how parameter is a choice of .Dv M_WAIT / M_DONTWAIT from caller. The flag .Dv M_EXT is set upon success. Implemented as a macro. .It Fn M_COPY_PKTHDR "struct mbuf *to" "struct mbuf *from" Copies the mbuf pkthdr from mbuf .Fa from to mbuf .Fa to . .Fa from must have the type flag .Dv M_PKTHDR set, and .Fa to must be empty. Implemented as a macro. .It Fn M_ALIGN "struct mbuf *m" "int len" Sets the data pointer of a newly allocated mbuf .Fa m to .Fa len bytes from the end of the mbuf data area, so that .Fa len bytes of data written to the mbuf .Fa m , starting at the data pointer, will be aligned to the end of the data area. Implemented as a macro. .It Fn MH_ALIGN "struct mbuf *m" "int len" Sets the data pointer of a newly allocated packetheader mbuf .Fa m to .Fa len bytes from the end of the mbuf data area, so that .Fa len bytes of data written to the mbuf .Fa m , starting at the data pointer, will be aligned to the end of the data area. Implemented as a macro. .It Fn M_LEADINGSPACE "struct mbuf *m" Returns the amount of space available before the current start of valid data in mbuf .Fa m . Returns 0 if the mbuf data part is shared across multiple mbufs .Pq i.e. not writable . Implemented as a macro. .It Fn M_TRAILINGSPACE "struct mbuf *m" Returns the amount of space available after the current end of valid data in mbuf .Fa m . Returns 0 if the mbuf data part is shared across multiple mbufs .Pq i.e. not writable . Implemented as a macro. .It Fn M_PREPEND "struct mbuf *m" "int plen" "int how" Prepends space of size .Fa plen to mbuf .Fa m . If a new mbuf must be allocated, .Fa how specifies whether to wait. If .Fa how is .Dv M_DONTWAIT and allocation fails, the original mbuf chain is freed and .Fa m is set to .Dv NULL . Implemented as a macro. .It Fn MCHTYPE "struct mbuf *m" "int type" Change mbuf .Fa m to new type .Fa type . Implemented as a macro. .It Fn MEXTREMOVE "struct mbuf *m" Removes external storage from mbuf .Fa m . The flag .Dv M_EXT is removed. Implemented as a macro. .It Fn MFREE "struct mbuf *m" "struct mbuf *n" Frees a single mbuf .Fa m and places the successor, if any, in mbuf .Fa n . Implemented as a macro. .El .Sh FILES The .Nm management functions are implemented within the file .Pa sys/kern/uipc_mbuf.c . Function prototypes, and the functions implemented as macros are located in .Pa sys/sys/mbuf.h . Both pathnames are relative to the root of the .Nx source tree, .Pa /usr/src . .Sh SEE ALSO .Pa /usr/share/doc/smm/18.net , .Xr netstat 1 , .Xr malloc 9 .Rs .%A Jun-ichiro Hagino .%T "Mbuf issues in 4.4BSD IPv6/IPsec support (experiences from KAME IPv6/IPsec implementation)" .%B "Proceedings of the freenix track: 2000 USENIX annual technical conference" .%D June 2000 .Re .Sh AUTHORS The original mbuf data structures were designed by Rob Gurwitz when he did the initial TCP/IP implementation at BBN. .br Further extensions and enhancements were made by Bill Joy, Sam Leffler, and Mike Karels at CSRG. .br Current implementation of external storage by .An Matt Thomas .Aq matt@3am-software.com and .An Jason R. Thorpe .Aq thorpej@NetBSD.org .