436 lines
16 KiB
Groff
436 lines
16 KiB
Groff
.\" $NetBSD: route.4,v 1.34 2021/08/17 22:00:26 andvar Exp $
|
|
.\"
|
|
.\" Copyright (c) 1990, 1991, 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)route.4 8.6 (Berkeley) 4/19/94
|
|
.\"
|
|
.Dd February 4, 2020
|
|
.Dt ROUTE 4
|
|
.Os
|
|
.Sh NAME
|
|
.Nm route
|
|
.Nd kernel packet forwarding database
|
|
.Sh SYNOPSIS
|
|
.In sys/socket.h
|
|
.In net/if.h
|
|
.In net/route.h
|
|
.Ft int
|
|
.Fn socket PF_ROUTE SOCK_RAW "int family"
|
|
.Sh DESCRIPTION
|
|
.Ux
|
|
provides some packet routing facilities.
|
|
The kernel maintains a routing information database, which
|
|
is used in selecting the appropriate network interface when
|
|
transmitting packets.
|
|
.Pp
|
|
A user process (or possibly multiple co-operating processes)
|
|
maintains this database by sending messages over a special kind
|
|
of socket.
|
|
This supplants fixed size
|
|
.Xr ioctl 2 Ns 's
|
|
used in earlier releases.
|
|
Routing table changes may only be carried out by the super user.
|
|
.Pp
|
|
The operating system may spontaneously emit routing messages in response
|
|
to external events, such as receipt of a redirect, or failure to
|
|
locate a suitable route for a request.
|
|
The message types are described in greater detail below.
|
|
.Pp
|
|
Routing database entries come in two flavors: for a specific
|
|
host, or for all hosts on a generic subnetwork (as specified
|
|
by a bit mask and value under the mask.
|
|
The effect of wildcard or default route may be achieved by using
|
|
a mask of all zeros, and there may be hierarchical routes.
|
|
.Pp
|
|
When the system is booted and addresses are assigned
|
|
to the network interfaces, each protocol family
|
|
installs a routing table entry for each interface when it is ready for traffic.
|
|
Normally the protocol specifies the route
|
|
through each interface as a
|
|
.Dq direct
|
|
connection to the destination host
|
|
or network.
|
|
If the route is direct, the transport layer of a protocol family
|
|
usually requests the packet be sent to the same host specified in
|
|
the packet.
|
|
Otherwise, the interface is requested to address the packet to the
|
|
gateway listed in the routing entry (i.e. the packet is forwarded).
|
|
.Pp
|
|
When routing a packet,
|
|
the kernel will attempt to find
|
|
the most specific route matching the destination.
|
|
(If there are two different mask and value-under-the-mask pairs
|
|
that match, the more specific is the one with more bits in the mask.
|
|
A route to a host is regarded as being supplied with a mask of
|
|
as many ones as there are bits in the destination).
|
|
If no entry is found, the destination is declared to be unreachable,
|
|
and a routing\-miss message is generated if there are any
|
|
listeners on the routing control socket described below.
|
|
.Pp
|
|
A wildcard routing entry is specified with a zero
|
|
destination address value, and a mask of all zeroes.
|
|
Wildcard routes will be used
|
|
when the system fails to find other routes matching the
|
|
destination.
|
|
The combination of wildcard routes and routing redirects can provide
|
|
an economical mechanism for routing traffic.
|
|
.Pp
|
|
One opens the channel for passing routing control messages
|
|
by using the socket call shown in the synopsis above:
|
|
.Pp
|
|
The
|
|
.Fa family
|
|
parameter may be
|
|
.Dv AF_UNSPEC
|
|
which will provide
|
|
routing information for all address families, or can be restricted
|
|
to a specific address family by specifying which one is desired.
|
|
There can be more than one routing socket open per system.
|
|
.Pp
|
|
Messages are formed by a header followed by a small
|
|
number of sockaddrs (now variable length particularly
|
|
in the
|
|
.Tn ISO
|
|
case), interpreted by position, and delimited
|
|
by the new length entry in the sockaddr.
|
|
An example of a message with four addresses might be an
|
|
.Tn ISO
|
|
redirect:
|
|
Destination, Netmask, Gateway, and Author of the redirect.
|
|
The interpretation of which address are present is given by a
|
|
bit mask within the header, and the sequence is least significant
|
|
to most significant bit within the vector.
|
|
.Pp
|
|
Any messages sent to the kernel are returned, and copies are sent
|
|
to all interested listeners.
|
|
The exception to this is a new address marked as tentative, where copies
|
|
will be sent once Duplicate Address Detection has completed and
|
|
the tentative flag cleared or the duplicated flag set.
|
|
Also, new address messages will also be emitted when other flags on the address
|
|
change such as deprecated and detached.
|
|
The kernel will provide the process ID for the sender, and the
|
|
sender may use an additional sequence field to distinguish between
|
|
outstanding messages.
|
|
However, message replies may be lost when kernel buffers are exhausted.
|
|
.Pp
|
|
The kernel may reject certain messages, and will indicate this
|
|
by filling in the
|
|
.Fa rtm_errno
|
|
field.
|
|
The routing code returns
|
|
.Er EEXIST
|
|
if
|
|
requested to duplicate an existing entry,
|
|
.Er ESRCH
|
|
if
|
|
requested to delete a non-existent entry,
|
|
or
|
|
.Er ENOBUFS
|
|
if insufficient resources were available
|
|
to install a new route.
|
|
In the current implementation, all routing processes run locally,
|
|
and the values for
|
|
.Fa rtm_errno
|
|
are available through the normal
|
|
.Em errno
|
|
mechanism, even if the routing reply message is lost.
|
|
.Pp
|
|
A process may avoid the expense of reading replies to
|
|
its own messages by issuing a
|
|
.Xr setsockopt 2
|
|
call indicating that the
|
|
.Dv SO_USELOOPBACK
|
|
option
|
|
at the
|
|
.Dv SOL_SOCKET
|
|
level is to be turned off.
|
|
A process may ignore all messages from the routing socket
|
|
by doing a
|
|
.Xr shutdown 2
|
|
system call for further input.
|
|
.Pp
|
|
A process can specify which route message types it's interested in by passing
|
|
an array of route message types to the
|
|
.Xr setsockopt 2
|
|
call with the
|
|
.Dv RO_MSGFILTER
|
|
option at the
|
|
.Dv PF_ROUTE
|
|
level.
|
|
For example, to only get specific messages:
|
|
.Bd -literal -offset indent
|
|
unsigned char rtfilter[] = { RTM_IFINFO, RTM_IFANNOUNCE };
|
|
|
|
if (setsockopt(routefd, PF_ROUTE, RO_MSGFILTER,
|
|
&rtfilter, (socklen_t)sizeof(rtfilter)) == -1)
|
|
err(1, "setsockopt(RO_MSGFILTER)");
|
|
.Ed
|
|
.Pp
|
|
A process can specify which RTM_MISS destination addresses it's interested in
|
|
by passing an array of struct sockaddr to the
|
|
.Xr setsockopt 2
|
|
call with the
|
|
.Dv RO_MISSFILTER
|
|
option at the
|
|
.Dv PF_ROUTE
|
|
level.
|
|
For example, to only get RTM_MISS messages for specific destinations:
|
|
.Bd -literal -offset indent
|
|
char buf[1024] = { '\\0' }, *cp = buf;
|
|
struct sockaddr_in sin = {
|
|
.sin_family = AF_INET,
|
|
.sin_len = sizeof(sin),
|
|
};
|
|
|
|
inet_aton("192.168.0.1", &sin.sin_addr);
|
|
memcpy(cp, &sin, sin.sin_len);
|
|
cp += RT_ROUNDUP(sin.sin_len);
|
|
|
|
inet_aton("192.168.0.2", &sin.sin_addr);
|
|
memcpy(cp, &sin, sin.sin_len);
|
|
cp += RT_ROUNDUP(sin.sin_len);
|
|
|
|
if (setsockopt(routefd, PF_ROUTE, RO_MISSFILTER,
|
|
&sin, (socklen_t)(cp - buf)) == -1)
|
|
err(1, "setsockopt(RO_MISSFILTER)");
|
|
.Ed
|
|
.Pp
|
|
If a route is in use when it is deleted,
|
|
the routing entry will be marked down and removed from the routing table,
|
|
but the resources associated with it will not
|
|
be reclaimed until all references to it are released.
|
|
User processes can obtain information about the routing
|
|
entry to a specific destination by using a
|
|
.Dv RTM_GET
|
|
message,
|
|
or by reading the
|
|
.Pa /dev/kmem
|
|
device, or by calling
|
|
.Xr sysctl 3 .
|
|
.Pp
|
|
The messages are:
|
|
.Bd -literal
|
|
#define RTM_ADD 0x1 /* Add Route */
|
|
#define RTM_DELETE 0x2 /* Delete Route */
|
|
#define RTM_CHANGE 0x3 /* Change Metrics, Flags, or Gateway */
|
|
#define RTM_GET 0x4 /* Report Information */
|
|
#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */
|
|
#define RTM_REDIRECT 0x6 /* Told to use different route */
|
|
#define RTM_MISS 0x7 /* Lookup failed on this address */
|
|
#define RTM_LOCK 0x8 /* fix specified metrics */
|
|
#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */
|
|
#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */
|
|
#define RTM_ONEWADDR 0xc /* Old (pre-8.0) RTM_NEWADDR message */
|
|
// #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */
|
|
#define RTM_ODELADDR 0xd /* Old (pre-8.0) RTM_DELADDR message */
|
|
#define RTM_OOIFINFO 0xe /* Old (pre-1.5) RTM_IFINFO message */
|
|
#define RTM_OIFINFO 0xf /* Old (pre-6.0) RTM_IFINFO message */
|
|
#define RTM_IFANNOUNCE 0x10 /* iface arrival/departure */
|
|
#define RTM_IEEE80211 0x11 /* IEEE80211 wireless event */
|
|
#define RTM_SETGATE 0x12 /* set prototype gateway for clones
|
|
* (see example in arp_rtrequest).
|
|
*/
|
|
#define RTM_LLINFO_UPD 0x13 /* indication to ARP/NDP/etc. that link-layer
|
|
* address has changed
|
|
*/
|
|
#define RTM_IFINFO 0x14 /* iface/link going up/down etc. */
|
|
#define RTM_OCHGADDR 0x15 /* Old (pre-8.0) RTM_CHGADDR message */
|
|
#define RTM_NEWADDR 0x16 /* address being added to iface */
|
|
#define RTM_DELADDR 0x17 /* address being removed from iface */
|
|
#define RTM_CHGADDR 0x18 /* address properties changed */
|
|
.Ed
|
|
.Pp
|
|
A message header consists of one of the following:
|
|
.Bd -literal
|
|
struct rt_msghdr {
|
|
u_short rtm_msglen; /* to skip over non-understood messages */
|
|
u_char rtm_version; /* future binary compatibility */
|
|
u_char rtm_type; /* message type */
|
|
u_short rtm_index; /* index for associated ifp */
|
|
int rtm_flags; /* flags, incl kern & message, e.g. DONE */
|
|
int rtm_addrs; /* bitmask identifying sockaddrs in msg */
|
|
pid_t rtm_pid; /* identify sender */
|
|
int rtm_seq; /* for sender to identify action */
|
|
int rtm_errno; /* why failed */
|
|
int rtm_use; /* from rtentry */
|
|
u_long rtm_inits; /* which metrics we are initializing */
|
|
struct rt_metrics rtm_rmx; /* metrics themselves */
|
|
};
|
|
|
|
struct if_msghdr {
|
|
u_short ifm_msglen; /* to skip over non-understood messages */
|
|
u_char ifm_version; /* future binary compatibility */
|
|
u_char ifm_type; /* message type */
|
|
int ifm_addrs; /* like rtm_addrs */
|
|
int ifm_flags; /* value of if_flags */
|
|
u_short ifm_index; /* index for associated ifp */
|
|
struct if_data ifm_data; /* statistics and other data about if */
|
|
};
|
|
|
|
struct ifa_msghdr {
|
|
u_short ifam_msglen; /* to skip over non-understood messages */
|
|
u_char ifam_version; /* future binary compatibility */
|
|
u_char ifam_type; /* message type */
|
|
u_short ifam_index; /* index for associated ifp */
|
|
int ifam_flags; /* value of ifa_flags */
|
|
int ifam_addrs; /* like rtm_addrs */
|
|
pid_t ifam_pid; /* identify sender */
|
|
int ifam_addrflags; /* family specific address flags */
|
|
int ifam_metric; /* value of ifa_metric */
|
|
};
|
|
|
|
struct if_announcemsghdr {
|
|
u_short ifan_msglen; /* to skip over non-understood messages */
|
|
u_char ifan_version; /* future binary compatibility */
|
|
u_char ifan_type; /* message type */
|
|
u_short ifan_index; /* index for associated ifp */
|
|
char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */
|
|
u_short ifan_what; /* what type of announcement */
|
|
};
|
|
.Ed
|
|
.Pp
|
|
The
|
|
.Dv RTM_IFINFO
|
|
message uses a
|
|
.Vt if_msghdr
|
|
header, the
|
|
.Dv RTM_NEWADDR ,
|
|
.Dv RTM_CHGADDR ,
|
|
and
|
|
.Dv RTM_DELADDR
|
|
messages use a
|
|
.Vt ifa_msghdr
|
|
header,
|
|
the
|
|
.Dv RTM_IFANNOUNCE
|
|
message uses a
|
|
.Vt if_announcemsghdr
|
|
header,
|
|
and all other messages use the
|
|
.Vt rt_msghdr
|
|
header.
|
|
.Pp
|
|
The metrics structure is:
|
|
.Bd -literal
|
|
struct rt_metrics {
|
|
u_long rmx_locks; /* Kernel must leave these values alone */
|
|
u_long rmx_mtu; /* MTU for this path */
|
|
u_long rmx_hopcount; /* max hops expected */
|
|
u_long rmx_expire; /* lifetime for route, e.g. redirect */
|
|
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
|
|
u_long rmx_sendpipe; /* outbound delay-bandwidth product */
|
|
u_long rmx_ssthresh; /* outbound gateway buffer limit */
|
|
u_long rmx_rtt; /* estimated round trip time */
|
|
u_long rmx_rttvar; /* estimated rtt variance */
|
|
u_long rmx_pksent; /* packets sent using this route */
|
|
};
|
|
.Ed
|
|
.Pp
|
|
Flags include the values:
|
|
.Bd -literal
|
|
#define RTF_UP 0x1 /* route usable */
|
|
#define RTF_GATEWAY 0x2 /* destination is a gateway */
|
|
#define RTF_HOST 0x4 /* host entry (net otherwise) */
|
|
#define RTF_REJECT 0x8 /* host or net unreachable */
|
|
#define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */
|
|
#define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */
|
|
#define RTF_DONE 0x40 /* message confirmed */
|
|
#define RTF_MASK 0x80 /* subnet mask present */
|
|
#define RTF_CONNECTED 0x100 /* hosts on this route are neighbours */
|
|
#define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */
|
|
#define RTF_STATIC 0x800 /* manually added */
|
|
#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */
|
|
#define RTF_PROTO2 0x4000 /* protocol specific routing flag */
|
|
#define RTF_PROTO1 0x8000 /* protocol specific routing flag */
|
|
#define RTF_SRC 0x10000 /* route has fixed source address */
|
|
#define RTF_ANNOUNCE 0x20000 /* announce new ARP or NDP entry */
|
|
#define RTF_LOCAL 0x40000 /* route represents a local address */
|
|
#define RTF_BROADCAST 0x80000 /* route represents a bcast address */
|
|
.Ed
|
|
.Pp
|
|
Specifiers for metric values in rmx_locks and rtm_inits are:
|
|
.Bd -literal
|
|
#define RTV_MTU 0x1 /* init or lock _mtu */
|
|
#define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */
|
|
#define RTV_EXPIRE 0x4 /* init or lock _expire */
|
|
#define RTV_RPIPE 0x8 /* init or lock _recvpipe */
|
|
#define RTV_SPIPE 0x10 /* init or lock _sendpipe */
|
|
#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */
|
|
#define RTV_RTT 0x40 /* init or lock _rtt */
|
|
#define RTV_RTTVAR 0x80 /* init or lock _rttvar */
|
|
.Ed
|
|
.Pp
|
|
Specifiers for which addresses are present in the messages are:
|
|
.Bd -literal
|
|
#define RTA_DST 0x1 /* destination sockaddr present */
|
|
#define RTA_GATEWAY 0x2 /* gateway sockaddr present */
|
|
#define RTA_NETMASK 0x4 /* netmask sockaddr present */
|
|
#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */
|
|
#define RTA_IFP 0x10 /* interface name sockaddr present */
|
|
#define RTA_IFA 0x20 /* interface addr sockaddr present */
|
|
#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */
|
|
#define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */
|
|
#define RTA_TAG 0x100 /* route tag */
|
|
.Ed
|
|
.Pp
|
|
Flags for IPv6 addresses:
|
|
.Bd -literal
|
|
#define IN6_IFF_ANYCAST 0x01 /* anycast address */
|
|
#define IN6_IFF_TENTATIVE 0x02 /* tentative address */
|
|
#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */
|
|
#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */
|
|
#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */
|
|
#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address
|
|
* (used only at first SIOC* call)
|
|
*/
|
|
#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */
|
|
#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */
|
|
.Ed
|
|
.Sh SEE ALSO
|
|
.Xr socket 2 ,
|
|
.Xr sysctl 3
|
|
.Sh HISTORY
|
|
Since
|
|
.Nx 8.0 ,
|
|
.Cm RTF_CLONED ,
|
|
.Cm RTF_CLONING ,
|
|
.Cm RTF_LLINFO ,
|
|
.Cm RTF_XRESOLVE
|
|
and
|
|
.Cm RTM_RESOLVE
|
|
were obsolete.
|
|
.Cm RTF_CONNECTED
|
|
and
|
|
.Cm RTF_LLDATA
|
|
appeared in
|
|
.Nx 8.0 .
|
|
.Pp
|
|
.Vt ifa_msghdr
|
|
gained the fields ifam_pid and ifam_addrflags in
|
|
.Nx 8.0 .
|