NetBSD/sys/netinet6/frag6.c

744 lines
19 KiB
C
Raw Normal View History

/* $NetBSD: frag6.c,v 1.78 2024/04/19 05:04:06 ozaki-r Exp $ */
/* $KAME: frag6.c,v 1.40 2002/05/27 21:40:31 itojun Exp $ */
1999-07-04 01:24:45 +04:00
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
2001-11-13 03:56:55 +03:00
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: frag6.c,v 1.78 2024/04/19 05:04:06 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
#endif
2001-11-13 03:56:55 +03:00
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kmem.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
2008-04-15 07:57:04 +04:00
#include <netinet6/ip6_private.h>
#include <netinet/icmp6.h>
/*
* IPv6 reassembly queue structure. Each fragment being reassembled is
* attached to one of these structures.
*
* XXX: Would be better to use TAILQ.
*/
struct ip6q {
u_int32_t ip6q_head;
u_int16_t ip6q_len;
u_int8_t ip6q_nxt; /* ip6f_nxt in first fragment */
u_int8_t ip6q_hlim;
struct ip6asfrag *ip6q_down;
struct ip6asfrag *ip6q_up;
u_int32_t ip6q_ident;
u_int8_t ip6q_ttl;
struct in6_addr ip6q_src, ip6q_dst;
struct ip6q *ip6q_next;
struct ip6q *ip6q_prev;
int ip6q_unfrglen; /* len of unfragmentable part */
int ip6q_nfrag; /* # of fragments */
int ip6q_ipsec; /* IPsec flags */
};
struct ip6asfrag {
u_int32_t ip6af_head;
u_int16_t ip6af_len;
u_int8_t ip6af_nxt;
u_int8_t ip6af_hlim;
/* must not override the above members during reassembling */
struct ip6asfrag *ip6af_down;
struct ip6asfrag *ip6af_up;
struct mbuf *ip6af_m;
int ip6af_offset; /* offset in ip6af_m to next header */
int ip6af_frglen; /* fragmentable part length */
int ip6af_off; /* fragment offset */
bool ip6af_mff; /* more fragment bit in frag off */
};
static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
static void frag6_deq(struct ip6asfrag *);
static void frag6_insque(struct ip6q *, struct ip6q *);
static void frag6_remque(struct ip6q *);
static void frag6_freef(struct ip6q *);
static int frag6_drainwanted;
static u_int frag6_nfragpackets;
static u_int frag6_nfrags;
static struct ip6q ip6q; /* ip6 reassembly queue */
/* Protects ip6q */
static kmutex_t frag6_lock __cacheline_aligned;
/*
* Initialise reassembly queue and fragment identifier.
*/
void
frag6_init(void)
{
ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
mutex_init(&frag6_lock, MUTEX_DEFAULT, IPL_NONE);
}
static void
frag6_dropfrag(struct ip6q *q6)
{
frag6_remque(q6);
frag6_nfrags -= q6->ip6q_nfrag;
kmem_intr_free(q6, sizeof(*q6));
frag6_nfragpackets--;
}
/*
* IPv6 fragment input.
*
* In RFC2460, fragment and reassembly rule do not agree with each other,
* in terms of next header field handling in fragment header.
* While the sender will use the same value for all of the fragmented packets,
* receiver is suggested not to check the consistency.
*
* fragment rule (p20):
* (2) A Fragment header containing:
* The Next Header value that identifies the first header of
* the Fragmentable Part of the original packet.
* -> next header field is same for all fragments
*
* reassembly rule (p21):
* The Next Header field of the last header of the Unfragmentable
* Part is obtained from the Next Header field of the first
* fragment's Fragment header.
* -> should grab it from the first fragment only
*
* The following note also contradicts with fragment rule - noone is going to
* send different fragment with different next header field.
*
* additional note (p22):
* The Next Header values in the Fragment headers of different
* fragments of the same original packet may differ. Only the value
* from the Offset zero fragment packet is used for reassembly.
* -> should grab it from the first fragment only
*
* There is no explicit reason given in the RFC. Historical reason maybe?
*
* XXX: It would be better to use a pool, rather than kmem.
*/
int
frag6_input(struct mbuf **mp, int *offp, int proto)
{
struct rtentry *rt;
struct mbuf *m = *mp, *t;
struct ip6_hdr *ip6;
struct ip6_frag *ip6f;
struct ip6q *q6;
struct ip6asfrag *af6, *ip6af, *af6dwn;
int offset = *offp, nxt, i, next;
int ipsecflags = m->m_flags & (M_DECRYPTED|M_AUTHIPHDR);
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
struct ifnet *dstifp;
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
static struct route ro;
union {
struct sockaddr dst;
struct sockaddr_in6 dst6;
} u;
ip6 = mtod(m, struct ip6_hdr *);
IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
if (ip6f == NULL)
return IPPROTO_DONE;
dstifp = NULL;
/* find the destination interface of the packet. */
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
sockaddr_in6_init(&u.dst6, &ip6->ip6_dst, 0, 0, 0);
if ((rt = rtcache_lookup(&ro, &u.dst)) != NULL)
dstifp = ((struct in6_ifaddr *)rt->rt_ifa)->ia_ifp;
/* jumbo payload can't contain a fragment header */
if (ip6->ip6_plen == 0) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
goto done;
}
/*
* Check whether fragment packet's fragment length is non-zero and
* multiple of 8 octets.
* sizeof(struct ip6_frag) == 8
* sizeof(struct ip6_hdr) = 40
*/
frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset
- sizeof(struct ip6_frag);
if ((frgpartlen == 0) ||
((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (frgpartlen & 0x7) != 0)) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offsetof(struct ip6_hdr, ip6_plen));
in6_ifstat_inc(dstifp, ifs6_reass_fail);
goto done;
}
2008-04-15 07:57:04 +04:00
IP6_STATINC(IP6_STAT_FRAGMENTS);
in6_ifstat_inc(dstifp, ifs6_reass_reqd);
2002-06-09 18:43:10 +04:00
/* offset now points to data portion */
offset += sizeof(struct ip6_frag);
/*
* RFC6946: A host that receives an IPv6 packet which includes
* a Fragment Header with the "Fragment Offset" equal to 0 and
* the "M" bit equal to 0 MUST process such packet in isolation
* from any other packets/fragments.
*
* XXX: Would be better to remove this fragment header entirely,
* for us not to get confused later when looking back at the
* previous headers in the chain.
*/
fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
if (fragoff == 0 && !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) {
IP6_STATINC(IP6_STAT_REASSEMBLED);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
*offp = offset;
rtcache_unref(rt, &ro);
return ip6f->ip6f_nxt;
}
mutex_enter(&frag6_lock);
/*
* Enforce upper bound on number of fragments.
* If maxfrag is 0, never accept fragments.
* If maxfrag is -1, accept all fragments without limitation.
*/
if (ip6_maxfrags < 0)
;
else if (frag6_nfrags >= (u_int)ip6_maxfrags)
goto dropfrag;
for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
if (ip6f->ip6f_ident == q6->ip6q_ident &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
break;
if (q6 != &ip6q) {
/* All fragments must have the same IPsec flags. */
if (q6->ip6q_ipsec != ipsecflags) {
goto dropfrag;
}
}
if (q6 == &ip6q) {
/*
* the first fragment to arrive, create a reassembly queue.
*/
first_frag = 1;
/*
* Enforce upper bound on number of fragmented packets
* for which we attempt reassembly;
* If maxfragpackets is 0, never accept fragments.
* If maxfragpackets is -1, accept all fragments without
* limitation.
*/
if (ip6_maxfragpackets < 0)
;
else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
goto dropfrag;
frag6_nfragpackets++;
q6 = kmem_intr_zalloc(sizeof(struct ip6q), KM_NOSLEEP);
if (q6 == NULL) {
goto dropfrag;
}
frag6_insque(q6, &ip6q);
/* ip6q_nxt will be filled afterwards, from 1st fragment */
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
q6->ip6q_ident = ip6f->ip6f_ident;
q6->ip6q_ttl = IPV6_FRAGTTL;
q6->ip6q_src = ip6->ip6_src;
q6->ip6q_dst = ip6->ip6_dst;
q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
q6->ip6q_nfrag = 0;
q6->ip6q_ipsec = ipsecflags;
}
/*
* If it's the 1st fragment, record the length of the
* unfragmentable part and the next header of the fragment header.
*/
if (fragoff == 0) {
q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
sizeof(struct ip6_frag);
q6->ip6q_nxt = ip6f->ip6f_nxt;
}
/*
* Check that the reassembled packet would not exceed 65535 bytes
* in size. If it would exceed, discard the fragment and return an
* ICMP error.
*/
if (q6->ip6q_unfrglen >= 0) {
/* The 1st fragment has already arrived. */
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
mutex_exit(&frag6_lock);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
goto done;
}
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
mutex_exit(&frag6_lock);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
goto done;
}
/*
* If it's the first fragment, do the above check for each
* fragment already stored in the reassembly queue.
*/
if (fragoff == 0) {
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6dwn) {
af6dwn = af6->ip6af_down;
if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
IPV6_MAXPACKET) {
struct mbuf *merr = af6->ip6af_m;
struct ip6_hdr *ip6err;
int erroff = af6->ip6af_offset;
/* dequeue the fragment. */
frag6_deq(af6);
kmem_intr_free(af6, sizeof(struct ip6asfrag));
/* adjust pointer. */
ip6err = mtod(merr, struct ip6_hdr *);
/*
* Restore source and destination addresses
* in the erroneous IPv6 header.
*/
ip6err->ip6_src = q6->ip6q_src;
ip6err->ip6_dst = q6->ip6q_dst;
icmp6_error(merr, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
}
}
}
ip6af = kmem_intr_zalloc(sizeof(struct ip6asfrag), KM_NOSLEEP);
if (ip6af == NULL) {
goto dropfrag;
}
ip6af->ip6af_head = ip6->ip6_flow;
ip6af->ip6af_len = ip6->ip6_plen;
ip6af->ip6af_nxt = ip6->ip6_nxt;
ip6af->ip6af_hlim = ip6->ip6_hlim;
ip6af->ip6af_mff = (ip6f->ip6f_offlg & IP6F_MORE_FRAG) != 0;
ip6af->ip6af_off = fragoff;
ip6af->ip6af_frglen = frgpartlen;
ip6af->ip6af_offset = offset;
ip6af->ip6af_m = m;
if (first_frag) {
af6 = (struct ip6asfrag *)q6;
goto insert;
}
/*
* Find a segment which begins after this one does.
*/
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down)
if (af6->ip6af_off > ip6af->ip6af_off)
break;
/*
* If the incoming fragment overlaps some existing fragments in
* the reassembly queue - drop it as per RFC 5722.
*/
if (af6->ip6af_up != (struct ip6asfrag *)q6) {
i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
- ip6af->ip6af_off;
if (i > 0) {
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
goto dropfrag;
}
}
if (af6 != (struct ip6asfrag *)q6) {
i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
if (i > 0) {
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
goto dropfrag;
}
}
insert:
/*
2018-01-25 18:55:57 +03:00
* Stick new segment in its place.
*/
frag6_enq(ip6af, af6->ip6af_up);
frag6_nfrags++;
q6->ip6q_nfrag++;
/*
* Check for complete reassembly.
*/
next = 0;
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down) {
if (af6->ip6af_off != next) {
mutex_exit(&frag6_lock);
goto done;
}
next += af6->ip6af_frglen;
}
if (af6->ip6af_up->ip6af_mff) {
mutex_exit(&frag6_lock);
goto done;
}
/*
* Reassembly is complete; concatenate fragments.
*/
ip6af = q6->ip6q_down;
t = m = ip6af->ip6af_m;
af6 = ip6af->ip6af_down;
frag6_deq(ip6af);
while (af6 != (struct ip6asfrag *)q6) {
af6dwn = af6->ip6af_down;
frag6_deq(af6);
while (t->m_next)
t = t->m_next;
t->m_next = af6->ip6af_m;
m_adj(t->m_next, af6->ip6af_offset);
m_remove_pkthdr(t->m_next);
kmem_intr_free(af6, sizeof(struct ip6asfrag));
af6 = af6dwn;
}
/* adjust offset to point where the original next header starts */
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
next += offset - sizeof(struct ip6_hdr);
if ((u_int)next > IPV6_MAXPACKET) {
frag6_dropfrag(q6);
goto dropfrag;
}
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_plen = htons(next);
ip6->ip6_src = q6->ip6q_src;
ip6->ip6_dst = q6->ip6q_dst;
nxt = q6->ip6q_nxt;
/*
* Delete frag6 header.
*/
if (m->m_len >= offset + sizeof(struct ip6_frag)) {
memmove((char *)ip6 + sizeof(struct ip6_frag), ip6, offset);
m->m_data += sizeof(struct ip6_frag);
m->m_len -= sizeof(struct ip6_frag);
} else {
/* this comes with no copy if the boundary is on cluster */
if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
frag6_dropfrag(q6);
goto dropfrag;
}
m_adj(t, sizeof(struct ip6_frag));
m_cat(m, t);
}
frag6_dropfrag(q6);
{
KASSERT(m->m_flags & M_PKTHDR);
int plen = 0;
Fix a buffer overflow in ip6_get_prevhdr. Doing mtod(m, char *) + len is wrong, an option is allowed to be located in another mbuf of the chain. If the offset of an option within the chain is bigger than the length of the first mbuf in that chain, we are reading/writing one byte of packet- controlled data beyond the end of the first mbuf. The length of this first mbuf depends on the layout the network driver chose. In the most difficult case, it will allocate a 2KB cluster, which is bigger than the Ethernet MTU. But there is at least one way of exploiting this case: by sending a special combination of nested IPv6 fragments, the packet can control a good bunch of 'len'. By luck, the memory pool containing clusters does not embed the pool header in front of the items, so it is not straightforward to predict what is located at 'mtod(m, char *) + len'. However, by sending offending fragments in a loop, it is possible to crash the kernel - at some point we will hit important data structures. As far as I can tell, PF protects against this difficult case, because it kicks nested fragments. NPF does not protect against this. IPF I don't know. Then there are the more easy cases, if the MTU is bigger than a cluster, or if the network driver did not allocate a cluster, or perhaps if the fragments are received via a tunnel; I haven't investigated these cases. Change ip6_get_prevhdr so that it returns an offset in the chain, and always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET leaves M_PKTHDR untouched. This place is still fragile.
2018-01-30 17:49:25 +03:00
for (t = m; t; t = t->m_next) {
plen += t->m_len;
Fix a buffer overflow in ip6_get_prevhdr. Doing mtod(m, char *) + len is wrong, an option is allowed to be located in another mbuf of the chain. If the offset of an option within the chain is bigger than the length of the first mbuf in that chain, we are reading/writing one byte of packet- controlled data beyond the end of the first mbuf. The length of this first mbuf depends on the layout the network driver chose. In the most difficult case, it will allocate a 2KB cluster, which is bigger than the Ethernet MTU. But there is at least one way of exploiting this case: by sending a special combination of nested IPv6 fragments, the packet can control a good bunch of 'len'. By luck, the memory pool containing clusters does not embed the pool header in front of the items, so it is not straightforward to predict what is located at 'mtod(m, char *) + len'. However, by sending offending fragments in a loop, it is possible to crash the kernel - at some point we will hit important data structures. As far as I can tell, PF protects against this difficult case, because it kicks nested fragments. NPF does not protect against this. IPF I don't know. Then there are the more easy cases, if the MTU is bigger than a cluster, or if the network driver did not allocate a cluster, or perhaps if the fragments are received via a tunnel; I haven't investigated these cases. Change ip6_get_prevhdr so that it returns an offset in the chain, and always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET leaves M_PKTHDR untouched. This place is still fragile.
2018-01-30 17:49:25 +03:00
}
m->m_pkthdr.len = plen;
/* XXX XXX: clear csum_flags? */
}
2002-06-09 18:43:10 +04:00
Fix a buffer overflow in ip6_get_prevhdr. Doing mtod(m, char *) + len is wrong, an option is allowed to be located in another mbuf of the chain. If the offset of an option within the chain is bigger than the length of the first mbuf in that chain, we are reading/writing one byte of packet- controlled data beyond the end of the first mbuf. The length of this first mbuf depends on the layout the network driver chose. In the most difficult case, it will allocate a 2KB cluster, which is bigger than the Ethernet MTU. But there is at least one way of exploiting this case: by sending a special combination of nested IPv6 fragments, the packet can control a good bunch of 'len'. By luck, the memory pool containing clusters does not embed the pool header in front of the items, so it is not straightforward to predict what is located at 'mtod(m, char *) + len'. However, by sending offending fragments in a loop, it is possible to crash the kernel - at some point we will hit important data structures. As far as I can tell, PF protects against this difficult case, because it kicks nested fragments. NPF does not protect against this. IPF I don't know. Then there are the more easy cases, if the MTU is bigger than a cluster, or if the network driver did not allocate a cluster, or perhaps if the fragments are received via a tunnel; I haven't investigated these cases. Change ip6_get_prevhdr so that it returns an offset in the chain, and always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET leaves M_PKTHDR untouched. This place is still fragile.
2018-01-30 17:49:25 +03:00
/*
* Restore NXT to the original.
*/
{
const int prvnxt = ip6_get_prevhdr(m, offset);
uint8_t *prvnxtp;
IP6_EXTHDR_GET(prvnxtp, uint8_t *, m, prvnxt,
sizeof(*prvnxtp));
if (prvnxtp == NULL) {
goto dropfrag;
}
*prvnxtp = nxt;
}
2008-04-15 07:57:04 +04:00
IP6_STATINC(IP6_STAT_REASSEMBLED);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
rtcache_unref(rt, &ro);
2018-04-13 14:18:08 +03:00
mutex_exit(&frag6_lock);
/*
2018-04-13 14:18:08 +03:00
* Tell launch routine the next header.
*/
*mp = m;
*offp = offset;
return nxt;
dropfrag:
mutex_exit(&frag6_lock);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
2008-04-15 07:57:04 +04:00
IP6_STATINC(IP6_STAT_FRAGDROPPED);
m_freem(m);
done:
rtcache_unref(rt, &ro);
return IPPROTO_DONE;
}
int
ip6_reass_packet(struct mbuf **mp, int offset)
{
if (frag6_input(mp, &offset, IPPROTO_IPV6) == IPPROTO_DONE) {
*mp = NULL;
return EINVAL;
}
return 0;
}
/*
* Free a fragment reassembly header and all
* associated datagrams.
*/
static void
frag6_freef(struct ip6q *q6)
{
struct ip6asfrag *af6, *down6;
KASSERT(mutex_owned(&frag6_lock));
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = down6) {
struct mbuf *m = af6->ip6af_m;
down6 = af6->ip6af_down;
frag6_deq(af6);
/*
* Return ICMP time exceeded error for the 1st fragment.
* Just free other fragments.
*/
if (af6->ip6af_off == 0) {
struct ip6_hdr *ip6;
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
2018-01-25 18:55:57 +03:00
/* restore source and destination addresses */
ip6->ip6_src = q6->ip6q_src;
ip6->ip6_dst = q6->ip6q_dst;
icmp6_error(m, ICMP6_TIME_EXCEEDED,
ICMP6_TIME_EXCEED_REASSEMBLY, 0);
} else {
m_freem(m);
}
kmem_intr_free(af6, sizeof(struct ip6asfrag));
}
frag6_dropfrag(q6);
}
/*
* Put an ip fragment on a reassembly chain.
* Like insque, but pointers in middle of structure.
*/
void
frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
{
KASSERT(mutex_owned(&frag6_lock));
af6->ip6af_up = up6;
af6->ip6af_down = up6->ip6af_down;
up6->ip6af_down->ip6af_up = af6;
up6->ip6af_down = af6;
}
/*
* To frag6_enq as remque is to insque.
*/
void
frag6_deq(struct ip6asfrag *af6)
{
KASSERT(mutex_owned(&frag6_lock));
af6->ip6af_up->ip6af_down = af6->ip6af_down;
af6->ip6af_down->ip6af_up = af6->ip6af_up;
}
/*
* Insert newq after oldq.
*/
void
2014-09-05 09:33:06 +04:00
frag6_insque(struct ip6q *newq, struct ip6q *oldq)
{
KASSERT(mutex_owned(&frag6_lock));
2014-09-05 09:33:06 +04:00
newq->ip6q_prev = oldq;
newq->ip6q_next = oldq->ip6q_next;
oldq->ip6q_next->ip6q_prev = newq;
2014-09-05 09:33:06 +04:00
oldq->ip6q_next = newq;
}
/*
* Unlink p6.
*/
void
frag6_remque(struct ip6q *p6)
{
KASSERT(mutex_owned(&frag6_lock));
p6->ip6q_prev->ip6q_next = p6->ip6q_next;
p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
}
void
frag6_fasttimo(void)
{
SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
if (frag6_drainwanted) {
frag6_drain();
frag6_drainwanted = 0;
}
SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}
/*
* IPv6 reassembling timer processing;
* if a timer expires on a reassembly
* queue, discard it.
*/
void
frag6_slowtimo(void)
{
struct ip6q *q6;
SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
mutex_enter(&frag6_lock);
q6 = ip6q.ip6q_next;
if (q6) {
while (q6 != &ip6q) {
--q6->ip6q_ttl;
q6 = q6->ip6q_next;
if (q6->ip6q_prev->ip6q_ttl == 0) {
2008-04-15 07:57:04 +04:00
IP6_STATINC(IP6_STAT_FRAGTIMEOUT);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(q6->ip6q_prev);
}
}
}
/*
* If we are over the maximum number of fragments
* (due to the limit being lowered), drain off
* enough to get down to the new limit.
*/
while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
ip6q.ip6q_prev) {
2008-04-15 07:57:04 +04:00
IP6_STATINC(IP6_STAT_FRAGOVERFLOW);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(ip6q.ip6q_prev);
}
mutex_exit(&frag6_lock);
SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
#if 0
/*
* Routing changes might produce a better route than we last used;
* make sure we notice eventually, even if forwarding only for one
* destination and the cache is never replaced.
*/
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
rtcache_free(&ip6_forward_rt);
rtcache_free(&ipsrcchk_rt);
#endif
}
void
frag6_drainstub(void)
{
frag6_drainwanted = 1;
}
/*
* Drain off all datagram fragments.
*/
void
frag6_drain(void)
{
if (mutex_tryenter(&frag6_lock)) {
while (ip6q.ip6q_next != &ip6q) {
IP6_STATINC(IP6_STAT_FRAGDROPPED);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
frag6_freef(ip6q.ip6q_next);
}
mutex_exit(&frag6_lock);
}
}