2024-04-19 08:04:06 +03:00
|
|
|
/* $NetBSD: frag6.c,v 1.78 2024/04/19 05:04:06 ozaki-r Exp $ */
|
2002-05-28 07:04:05 +04:00
|
|
|
/* $KAME: frag6.c,v 1.40 2002/05/27 21:40:31 itojun Exp $ */
|
1999-07-04 01:24:45 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
|
|
* All rights reserved.
|
2001-02-10 07:14:26 +03:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2001-02-10 07:14:26 +03:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2001-11-13 03:56:55 +03:00
|
|
|
#include <sys/cdefs.h>
|
2024-04-19 08:04:06 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: frag6.c,v 1.78 2024/04/19 05:04:06 ozaki-r Exp $");
|
2017-01-24 10:09:24 +03:00
|
|
|
|
|
|
|
#ifdef _KERNEL_OPT
|
|
|
|
#include "opt_net_mpsafe.h"
|
|
|
|
#endif
|
2001-11-13 03:56:55 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/errno.h>
|
|
|
|
#include <sys/time.h>
|
2012-07-02 02:04:44 +04:00
|
|
|
#include <sys/kmem.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/ip6.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/ip6_var.h>
|
2008-04-15 07:57:04 +04:00
|
|
|
#include <netinet6/ip6_private.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/icmp6.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-01-25 18:33:06 +03:00
|
|
|
/*
|
2018-04-13 14:32:44 +03:00
|
|
|
* IPv6 reassembly queue structure. Each fragment being reassembled is
|
|
|
|
* attached to one of these structures.
|
|
|
|
*
|
|
|
|
* XXX: Would be better to use TAILQ.
|
2018-01-25 18:33:06 +03:00
|
|
|
*/
|
|
|
|
struct ip6q {
|
|
|
|
u_int32_t ip6q_head;
|
|
|
|
u_int16_t ip6q_len;
|
|
|
|
u_int8_t ip6q_nxt; /* ip6f_nxt in first fragment */
|
|
|
|
u_int8_t ip6q_hlim;
|
|
|
|
struct ip6asfrag *ip6q_down;
|
|
|
|
struct ip6asfrag *ip6q_up;
|
|
|
|
u_int32_t ip6q_ident;
|
|
|
|
u_int8_t ip6q_ttl;
|
|
|
|
struct in6_addr ip6q_src, ip6q_dst;
|
|
|
|
struct ip6q *ip6q_next;
|
|
|
|
struct ip6q *ip6q_prev;
|
|
|
|
int ip6q_unfrglen; /* len of unfragmentable part */
|
|
|
|
int ip6q_nfrag; /* # of fragments */
|
When reassembling IPv4/IPv6 packets, ensure each fragment has been subject
to the same IPsec processing. That is to say, that all fragments are ESP,
or AH, or AH+ESP, or none.
The reassembly mechanism can be used both on the wire and inside an IPsec
tunnel, so we need to make sure all fragments of a packet were received
on only one side.
Even though I haven't tried, I believe there are configurations where it
would be possible for an attacker to inject an unencrypted fragment into a
legitimate stream of already-decrypted-and-authenticated fragments.
Typically on IPsec gateways with ESP tunnels, where we can encapsulate
fragments (as opposed to the general case, where we fragment encapsulated
data).
Note, for the record: a funnier thing, under IPv4, would be to send a
zero-sized !MFF fragment at the head of the packet, and manage to trigger
an ICMP error; M_DECRYPTED gets lost by the reassembly, and ICMP will reply
with the packet in clear (not encrypted).
2018-05-15 22:16:38 +03:00
|
|
|
int ip6q_ipsec; /* IPsec flags */
|
2018-01-25 18:33:06 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ip6asfrag {
|
|
|
|
u_int32_t ip6af_head;
|
|
|
|
u_int16_t ip6af_len;
|
|
|
|
u_int8_t ip6af_nxt;
|
|
|
|
u_int8_t ip6af_hlim;
|
|
|
|
/* must not override the above members during reassembling */
|
|
|
|
struct ip6asfrag *ip6af_down;
|
|
|
|
struct ip6asfrag *ip6af_up;
|
|
|
|
struct mbuf *ip6af_m;
|
|
|
|
int ip6af_offset; /* offset in ip6af_m to next header */
|
|
|
|
int ip6af_frglen; /* fragmentable part length */
|
|
|
|
int ip6af_off; /* fragment offset */
|
2018-02-07 12:53:08 +03:00
|
|
|
bool ip6af_mff; /* more fragment bit in frag off */
|
2018-01-25 18:33:06 +03:00
|
|
|
};
|
|
|
|
|
2007-11-01 23:33:00 +03:00
|
|
|
static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
|
|
|
|
static void frag6_deq(struct ip6asfrag *);
|
|
|
|
static void frag6_insque(struct ip6q *, struct ip6q *);
|
|
|
|
static void frag6_remque(struct ip6q *);
|
|
|
|
static void frag6_freef(struct ip6q *);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2011-05-03 21:44:30 +04:00
|
|
|
static int frag6_drainwanted;
|
|
|
|
|
2018-04-13 14:32:44 +03:00
|
|
|
static u_int frag6_nfragpackets;
|
|
|
|
static u_int frag6_nfrags;
|
|
|
|
static struct ip6q ip6q; /* ip6 reassembly queue */
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-01-25 18:33:06 +03:00
|
|
|
/* Protects ip6q */
|
|
|
|
static kmutex_t frag6_lock __cacheline_aligned;
|
2002-03-15 13:44:07 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Initialise reassembly queue and fragment identifier.
|
|
|
|
*/
|
|
|
|
void
|
2008-02-27 22:40:56 +03:00
|
|
|
frag6_init(void)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
1999-08-26 15:10:49 +04:00
|
|
|
|
2000-02-03 21:13:01 +03:00
|
|
|
ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
|
2022-10-21 12:21:17 +03:00
|
|
|
mutex_init(&frag6_lock, MUTEX_DEFAULT, IPL_NONE);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2023-08-29 20:01:35 +03:00
|
|
|
static void
|
|
|
|
frag6_dropfrag(struct ip6q *q6)
|
|
|
|
{
|
|
|
|
frag6_remque(q6);
|
|
|
|
frag6_nfrags -= q6->ip6q_nfrag;
|
|
|
|
kmem_intr_free(q6, sizeof(*q6));
|
|
|
|
frag6_nfragpackets--;
|
|
|
|
}
|
|
|
|
|
2000-02-03 21:13:01 +03:00
|
|
|
/*
|
2012-07-02 02:04:44 +04:00
|
|
|
* IPv6 fragment input.
|
|
|
|
*
|
2000-02-03 21:13:01 +03:00
|
|
|
* In RFC2460, fragment and reassembly rule do not agree with each other,
|
|
|
|
* in terms of next header field handling in fragment header.
|
|
|
|
* While the sender will use the same value for all of the fragmented packets,
|
|
|
|
* receiver is suggested not to check the consistency.
|
|
|
|
*
|
|
|
|
* fragment rule (p20):
|
|
|
|
* (2) A Fragment header containing:
|
|
|
|
* The Next Header value that identifies the first header of
|
|
|
|
* the Fragmentable Part of the original packet.
|
|
|
|
* -> next header field is same for all fragments
|
|
|
|
*
|
2001-02-10 07:14:26 +03:00
|
|
|
* reassembly rule (p21):
|
2000-02-03 21:13:01 +03:00
|
|
|
* The Next Header field of the last header of the Unfragmentable
|
|
|
|
* Part is obtained from the Next Header field of the first
|
|
|
|
* fragment's Fragment header.
|
|
|
|
* -> should grab it from the first fragment only
|
|
|
|
*
|
|
|
|
* The following note also contradicts with fragment rule - noone is going to
|
|
|
|
* send different fragment with different next header field.
|
|
|
|
*
|
|
|
|
* additional note (p22):
|
|
|
|
* The Next Header values in the Fragment headers of different
|
|
|
|
* fragments of the same original packet may differ. Only the value
|
|
|
|
* from the Offset zero fragment packet is used for reassembly.
|
|
|
|
* -> should grab it from the first fragment only
|
|
|
|
*
|
|
|
|
* There is no explicit reason given in the RFC. Historical reason maybe?
|
2018-04-13 14:19:09 +03:00
|
|
|
*
|
|
|
|
* XXX: It would be better to use a pool, rather than kmem.
|
2000-02-03 21:13:01 +03:00
|
|
|
*/
|
2012-07-02 02:04:44 +04:00
|
|
|
int
|
|
|
|
frag6_input(struct mbuf **mp, int *offp, int proto)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2007-12-20 22:53:29 +03:00
|
|
|
struct rtentry *rt;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct mbuf *m = *mp, *t;
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
struct ip6_frag *ip6f;
|
|
|
|
struct ip6q *q6;
|
2000-02-03 21:13:01 +03:00
|
|
|
struct ip6asfrag *af6, *ip6af, *af6dwn;
|
1999-06-28 10:36:47 +04:00
|
|
|
int offset = *offp, nxt, i, next;
|
When reassembling IPv4/IPv6 packets, ensure each fragment has been subject
to the same IPsec processing. That is to say, that all fragments are ESP,
or AH, or AH+ESP, or none.
The reassembly mechanism can be used both on the wire and inside an IPsec
tunnel, so we need to make sure all fragments of a packet were received
on only one side.
Even though I haven't tried, I believe there are configurations where it
would be possible for an attacker to inject an unencrypted fragment into a
legitimate stream of already-decrypted-and-authenticated fragments.
Typically on IPsec gateways with ESP tunnels, where we can encapsulate
fragments (as opposed to the general case, where we fragment encapsulated
data).
Note, for the record: a funnier thing, under IPv4, would be to send a
zero-sized !MFF fragment at the head of the packet, and manage to trigger
an ICMP error; M_DECRYPTED gets lost by the reassembly, and ICMP will reply
with the packet in clear (not encrypted).
2018-05-15 22:16:38 +03:00
|
|
|
int ipsecflags = m->m_flags & (M_DECRYPTED|M_AUTHIPHDR);
|
1999-06-28 10:36:47 +04:00
|
|
|
int first_frag = 0;
|
2000-02-03 21:13:01 +03:00
|
|
|
int fragoff, frgpartlen; /* must be larger than u_int16_t */
|
1999-12-13 18:17:17 +03:00
|
|
|
struct ifnet *dstifp;
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
static struct route ro;
|
|
|
|
union {
|
|
|
|
struct sockaddr dst;
|
|
|
|
struct sockaddr_in6 dst6;
|
|
|
|
} u;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
1999-12-13 18:17:17 +03:00
|
|
|
IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
|
|
|
|
if (ip6f == NULL)
|
2012-07-02 02:04:44 +04:00
|
|
|
return IPPROTO_DONE;
|
1999-12-13 18:17:17 +03:00
|
|
|
|
|
|
|
dstifp = NULL;
|
|
|
|
/* find the destination interface of the packet. */
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
sockaddr_in6_init(&u.dst6, &ip6->ip6_dst, 0, 0, 0);
|
2019-11-13 05:51:22 +03:00
|
|
|
if ((rt = rtcache_lookup(&ro, &u.dst)) != NULL)
|
2007-12-20 22:53:29 +03:00
|
|
|
dstifp = ((struct in6_ifaddr *)rt->rt_ifa)->ia_ifp;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* jumbo payload can't contain a fragment header */
|
|
|
|
if (ip6->ip6_plen == 0) {
|
|
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
|
1999-12-13 18:17:17 +03:00
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_fail);
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-01-25 23:55:15 +03:00
|
|
|
* Check whether fragment packet's fragment length is non-zero and
|
2001-02-10 07:14:26 +03:00
|
|
|
* multiple of 8 octets.
|
1999-06-28 10:36:47 +04:00
|
|
|
* sizeof(struct ip6_frag) == 8
|
|
|
|
* sizeof(struct ip6_hdr) = 40
|
|
|
|
*/
|
2024-04-19 08:04:06 +03:00
|
|
|
frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset
|
|
|
|
- sizeof(struct ip6_frag);
|
|
|
|
if ((frgpartlen == 0) ||
|
|
|
|
((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (frgpartlen & 0x7) != 0)) {
|
2002-05-28 07:04:05 +04:00
|
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
|
|
|
offsetof(struct ip6_hdr, ip6_plen));
|
1999-12-13 18:17:17 +03:00
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_fail);
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATINC(IP6_STAT_FRAGMENTS);
|
1999-12-13 18:17:17 +03:00
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_reqd);
|
2002-06-09 18:43:10 +04:00
|
|
|
|
2000-02-03 21:13:01 +03:00
|
|
|
/* offset now points to data portion */
|
1999-06-28 10:36:47 +04:00
|
|
|
offset += sizeof(struct ip6_frag);
|
|
|
|
|
2012-09-28 03:10:00 +04:00
|
|
|
/*
|
2018-01-25 18:33:06 +03:00
|
|
|
* RFC6946: A host that receives an IPv6 packet which includes
|
|
|
|
* a Fragment Header with the "Fragment Offset" equal to 0 and
|
2013-08-30 11:42:08 +04:00
|
|
|
* the "M" bit equal to 0 MUST process such packet in isolation
|
2018-01-25 18:33:06 +03:00
|
|
|
* from any other packets/fragments.
|
2018-04-13 14:32:44 +03:00
|
|
|
*
|
|
|
|
* XXX: Would be better to remove this fragment header entirely,
|
|
|
|
* for us not to get confused later when looking back at the
|
|
|
|
* previous headers in the chain.
|
2012-09-28 03:10:00 +04:00
|
|
|
*/
|
|
|
|
fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
|
|
|
|
if (fragoff == 0 && !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) {
|
|
|
|
IP6_STATINC(IP6_STAT_REASSEMBLED);
|
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_ok);
|
|
|
|
*offp = offset;
|
2016-12-08 08:16:33 +03:00
|
|
|
rtcache_unref(rt, &ro);
|
2018-01-25 18:33:06 +03:00
|
|
|
return ip6f->ip6f_nxt;
|
2012-09-28 03:10:00 +04:00
|
|
|
}
|
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_enter(&frag6_lock);
|
2001-02-11 08:05:27 +03:00
|
|
|
|
2002-05-28 07:04:05 +04:00
|
|
|
/*
|
|
|
|
* Enforce upper bound on number of fragments.
|
|
|
|
* If maxfrag is 0, never accept fragments.
|
|
|
|
* If maxfrag is -1, accept all fragments without limitation.
|
|
|
|
*/
|
|
|
|
if (ip6_maxfrags < 0)
|
|
|
|
;
|
|
|
|
else if (frag6_nfrags >= (u_int)ip6_maxfrags)
|
|
|
|
goto dropfrag;
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
|
|
|
|
if (ip6f->ip6f_ident == q6->ip6q_ident &&
|
|
|
|
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
|
|
|
|
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
|
|
|
|
break;
|
|
|
|
|
When reassembling IPv4/IPv6 packets, ensure each fragment has been subject
to the same IPsec processing. That is to say, that all fragments are ESP,
or AH, or AH+ESP, or none.
The reassembly mechanism can be used both on the wire and inside an IPsec
tunnel, so we need to make sure all fragments of a packet were received
on only one side.
Even though I haven't tried, I believe there are configurations where it
would be possible for an attacker to inject an unencrypted fragment into a
legitimate stream of already-decrypted-and-authenticated fragments.
Typically on IPsec gateways with ESP tunnels, where we can encapsulate
fragments (as opposed to the general case, where we fragment encapsulated
data).
Note, for the record: a funnier thing, under IPv4, would be to send a
zero-sized !MFF fragment at the head of the packet, and manage to trigger
an ICMP error; M_DECRYPTED gets lost by the reassembly, and ICMP will reply
with the packet in clear (not encrypted).
2018-05-15 22:16:38 +03:00
|
|
|
if (q6 != &ip6q) {
|
|
|
|
/* All fragments must have the same IPsec flags. */
|
|
|
|
if (q6->ip6q_ipsec != ipsecflags) {
|
|
|
|
goto dropfrag;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
if (q6 == &ip6q) {
|
|
|
|
/*
|
|
|
|
* the first fragment to arrive, create a reassembly queue.
|
|
|
|
*/
|
|
|
|
first_frag = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Enforce upper bound on number of fragmented packets
|
2001-02-10 07:14:26 +03:00
|
|
|
* for which we attempt reassembly;
|
2002-05-28 07:04:05 +04:00
|
|
|
* If maxfragpackets is 0, never accept fragments.
|
|
|
|
* If maxfragpackets is -1, accept all fragments without
|
|
|
|
* limitation.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2001-02-22 08:04:42 +03:00
|
|
|
if (ip6_maxfragpackets < 0)
|
|
|
|
;
|
|
|
|
else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
|
|
|
|
goto dropfrag;
|
|
|
|
frag6_nfragpackets++;
|
2012-07-02 02:04:44 +04:00
|
|
|
|
|
|
|
q6 = kmem_intr_zalloc(sizeof(struct ip6q), KM_NOSLEEP);
|
|
|
|
if (q6 == NULL) {
|
1999-06-28 10:36:47 +04:00
|
|
|
goto dropfrag;
|
2012-07-02 02:04:44 +04:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
frag6_insque(q6, &ip6q);
|
|
|
|
|
2000-02-03 21:13:01 +03:00
|
|
|
/* ip6q_nxt will be filled afterwards, from 1st fragment */
|
1999-06-28 10:36:47 +04:00
|
|
|
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
|
|
|
|
q6->ip6q_ident = ip6f->ip6f_ident;
|
|
|
|
q6->ip6q_ttl = IPV6_FRAGTTL;
|
|
|
|
q6->ip6q_src = ip6->ip6_src;
|
|
|
|
q6->ip6q_dst = ip6->ip6_dst;
|
|
|
|
q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
|
2002-05-28 07:04:05 +04:00
|
|
|
q6->ip6q_nfrag = 0;
|
When reassembling IPv4/IPv6 packets, ensure each fragment has been subject
to the same IPsec processing. That is to say, that all fragments are ESP,
or AH, or AH+ESP, or none.
The reassembly mechanism can be used both on the wire and inside an IPsec
tunnel, so we need to make sure all fragments of a packet were received
on only one side.
Even though I haven't tried, I believe there are configurations where it
would be possible for an attacker to inject an unencrypted fragment into a
legitimate stream of already-decrypted-and-authenticated fragments.
Typically on IPsec gateways with ESP tunnels, where we can encapsulate
fragments (as opposed to the general case, where we fragment encapsulated
data).
Note, for the record: a funnier thing, under IPv4, would be to send a
zero-sized !MFF fragment at the head of the packet, and manage to trigger
an ICMP error; M_DECRYPTED gets lost by the reassembly, and ICMP will reply
with the packet in clear (not encrypted).
2018-05-15 22:16:38 +03:00
|
|
|
q6->ip6q_ipsec = ipsecflags;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's the 1st fragment, record the length of the
|
|
|
|
* unfragmentable part and the next header of the fragment header.
|
|
|
|
*/
|
|
|
|
if (fragoff == 0) {
|
2002-05-28 07:04:05 +04:00
|
|
|
q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
|
|
|
|
sizeof(struct ip6_frag);
|
1999-06-28 10:36:47 +04:00
|
|
|
q6->ip6q_nxt = ip6f->ip6f_nxt;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that the reassembled packet would not exceed 65535 bytes
|
2018-01-25 18:33:06 +03:00
|
|
|
* in size. If it would exceed, discard the fragment and return an
|
|
|
|
* ICMP error.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
if (q6->ip6q_unfrglen >= 0) {
|
|
|
|
/* The 1st fragment has already arrived. */
|
|
|
|
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
2002-05-28 07:04:05 +04:00
|
|
|
offset - sizeof(struct ip6_frag) +
|
|
|
|
offsetof(struct ip6_frag, ip6f_offlg));
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2002-05-28 07:04:05 +04:00
|
|
|
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
|
2018-01-25 18:33:06 +03:00
|
|
|
offset - sizeof(struct ip6_frag) +
|
|
|
|
offsetof(struct ip6_frag, ip6f_offlg));
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-01-25 18:33:06 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* If it's the first fragment, do the above check for each
|
|
|
|
* fragment already stored in the reassembly queue.
|
|
|
|
*/
|
|
|
|
if (fragoff == 0) {
|
|
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
|
|
af6 = af6dwn) {
|
|
|
|
af6dwn = af6->ip6af_down;
|
|
|
|
|
|
|
|
if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
|
|
|
|
IPV6_MAXPACKET) {
|
2018-01-25 18:33:06 +03:00
|
|
|
struct mbuf *merr = af6->ip6af_m;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ip6_hdr *ip6err;
|
|
|
|
int erroff = af6->ip6af_offset;
|
|
|
|
|
|
|
|
/* dequeue the fragment. */
|
|
|
|
frag6_deq(af6);
|
2012-07-02 02:04:44 +04:00
|
|
|
kmem_intr_free(af6, sizeof(struct ip6asfrag));
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* adjust pointer. */
|
|
|
|
ip6err = mtod(merr, struct ip6_hdr *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Restore source and destination addresses
|
|
|
|
* in the erroneous IPv6 header.
|
|
|
|
*/
|
|
|
|
ip6err->ip6_src = q6->ip6q_src;
|
|
|
|
ip6err->ip6_dst = q6->ip6q_dst;
|
|
|
|
|
|
|
|
icmp6_error(merr, ICMP6_PARAM_PROB,
|
2002-05-28 07:04:05 +04:00
|
|
|
ICMP6_PARAMPROB_HEADER,
|
|
|
|
erroff - sizeof(struct ip6_frag) +
|
|
|
|
offsetof(struct ip6_frag, ip6f_offlg));
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-02 02:04:44 +04:00
|
|
|
ip6af = kmem_intr_zalloc(sizeof(struct ip6asfrag), KM_NOSLEEP);
|
|
|
|
if (ip6af == NULL) {
|
2000-02-03 21:13:01 +03:00
|
|
|
goto dropfrag;
|
2012-07-02 02:04:44 +04:00
|
|
|
}
|
2000-02-03 21:13:01 +03:00
|
|
|
ip6af->ip6af_head = ip6->ip6_flow;
|
|
|
|
ip6af->ip6af_len = ip6->ip6_plen;
|
|
|
|
ip6af->ip6af_nxt = ip6->ip6_nxt;
|
|
|
|
ip6af->ip6af_hlim = ip6->ip6_hlim;
|
2018-02-07 12:53:08 +03:00
|
|
|
ip6af->ip6af_mff = (ip6f->ip6f_offlg & IP6F_MORE_FRAG) != 0;
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6af->ip6af_off = fragoff;
|
|
|
|
ip6af->ip6af_frglen = frgpartlen;
|
|
|
|
ip6af->ip6af_offset = offset;
|
2018-01-25 18:33:06 +03:00
|
|
|
ip6af->ip6af_m = m;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
if (first_frag) {
|
|
|
|
af6 = (struct ip6asfrag *)q6;
|
|
|
|
goto insert;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find a segment which begins after this one does.
|
|
|
|
*/
|
|
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
|
|
af6 = af6->ip6af_down)
|
|
|
|
if (af6->ip6af_off > ip6af->ip6af_off)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
2012-07-02 02:04:44 +04:00
|
|
|
* If the incoming fragment overlaps some existing fragments in
|
|
|
|
* the reassembly queue - drop it as per RFC 5722.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
if (af6->ip6af_up != (struct ip6asfrag *)q6) {
|
|
|
|
i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
|
|
|
|
- ip6af->ip6af_off;
|
|
|
|
if (i > 0) {
|
2012-07-02 02:04:44 +04:00
|
|
|
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
|
1999-06-28 10:36:47 +04:00
|
|
|
goto dropfrag;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (af6 != (struct ip6asfrag *)q6) {
|
|
|
|
i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
|
|
|
|
if (i > 0) {
|
2012-07-02 02:04:44 +04:00
|
|
|
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
|
1999-06-28 10:36:47 +04:00
|
|
|
goto dropfrag;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
insert:
|
|
|
|
/*
|
2018-01-25 18:55:57 +03:00
|
|
|
* Stick new segment in its place.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
frag6_enq(ip6af, af6->ip6af_up);
|
2002-05-28 07:04:05 +04:00
|
|
|
frag6_nfrags++;
|
|
|
|
q6->ip6q_nfrag++;
|
2018-01-25 18:33:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for complete reassembly.
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
next = 0;
|
|
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
|
|
af6 = af6->ip6af_down) {
|
|
|
|
if (af6->ip6af_off != next) {
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
next += af6->ip6af_frglen;
|
|
|
|
}
|
2018-02-07 12:53:08 +03:00
|
|
|
if (af6->ip6af_up->ip6af_mff) {
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
2016-11-09 06:49:38 +03:00
|
|
|
goto done;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reassembly is complete; concatenate fragments.
|
|
|
|
*/
|
|
|
|
ip6af = q6->ip6q_down;
|
2018-01-25 18:33:06 +03:00
|
|
|
t = m = ip6af->ip6af_m;
|
1999-06-28 10:36:47 +04:00
|
|
|
af6 = ip6af->ip6af_down;
|
2000-02-03 21:13:01 +03:00
|
|
|
frag6_deq(ip6af);
|
1999-06-28 10:36:47 +04:00
|
|
|
while (af6 != (struct ip6asfrag *)q6) {
|
2000-02-03 21:13:01 +03:00
|
|
|
af6dwn = af6->ip6af_down;
|
|
|
|
frag6_deq(af6);
|
1999-06-28 10:36:47 +04:00
|
|
|
while (t->m_next)
|
|
|
|
t = t->m_next;
|
2018-01-25 18:33:06 +03:00
|
|
|
t->m_next = af6->ip6af_m;
|
2000-02-03 21:13:01 +03:00
|
|
|
m_adj(t->m_next, af6->ip6af_offset);
|
2018-05-03 10:25:49 +03:00
|
|
|
m_remove_pkthdr(t->m_next);
|
2012-07-02 02:04:44 +04:00
|
|
|
kmem_intr_free(af6, sizeof(struct ip6asfrag));
|
2000-02-03 21:13:01 +03:00
|
|
|
af6 = af6dwn;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* adjust offset to point where the original next header starts */
|
|
|
|
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
|
2012-07-02 02:04:44 +04:00
|
|
|
kmem_intr_free(ip6af, sizeof(struct ip6asfrag));
|
2023-08-29 20:01:35 +03:00
|
|
|
next += offset - sizeof(struct ip6_hdr);
|
|
|
|
if ((u_int)next > IPV6_MAXPACKET) {
|
|
|
|
frag6_dropfrag(q6);
|
|
|
|
goto dropfrag;
|
|
|
|
}
|
2000-02-03 21:13:01 +03:00
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
2023-08-29 20:01:35 +03:00
|
|
|
ip6->ip6_plen = htons(next);
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6->ip6_src = q6->ip6q_src;
|
|
|
|
ip6->ip6_dst = q6->ip6q_dst;
|
|
|
|
nxt = q6->ip6q_nxt;
|
|
|
|
|
|
|
|
/*
|
2018-01-25 18:33:06 +03:00
|
|
|
* Delete frag6 header.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2011-01-22 21:26:36 +03:00
|
|
|
if (m->m_len >= offset + sizeof(struct ip6_frag)) {
|
2007-03-04 08:59:00 +03:00
|
|
|
memmove((char *)ip6 + sizeof(struct ip6_frag), ip6, offset);
|
2000-02-03 21:13:01 +03:00
|
|
|
m->m_data += sizeof(struct ip6_frag);
|
|
|
|
m->m_len -= sizeof(struct ip6_frag);
|
|
|
|
} else {
|
|
|
|
/* this comes with no copy if the boundary is on cluster */
|
|
|
|
if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
|
2023-08-29 20:01:35 +03:00
|
|
|
frag6_dropfrag(q6);
|
2000-02-03 21:13:01 +03:00
|
|
|
goto dropfrag;
|
|
|
|
}
|
|
|
|
m_adj(t, sizeof(struct ip6_frag));
|
|
|
|
m_cat(m, t);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2023-08-29 20:01:35 +03:00
|
|
|
frag6_dropfrag(q6);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Remove M_PKTHDR from secondary mbufs when reassembling packets.
This is a real problem, because I found at least one component that relies
on the fact that only the first mbuf has M_PKTHDR: far from here, in
m_splithdr, we don't update m->m_pkthdr.len if M_PKTHDR is found in a
secondary mbuf. (The initial intention there was to avoid updating
m_pkthdr.len twice, the assumption was that if M_PKTHDR is set then we're
dealing with the first mbuf.) Therefore, when handling fragmented IPsec
packets (in particular IPv6, IPv4 is a bit more complicated), we may end
up with an incorrect m_pkthdr.len after authentication or decryption. In
the case of ESP, this can lead to a remote crash on this instruction:
m_copydata(m, m->m_pkthdr.len - 3, 3, lastthree);
m_pkthdr.len is bigger than the actual mbuf chain.
It seems possible to me to trigger this bug even if you don't have the ESP
key, because the fragmentation part is outside of the encrypted ESP
payload.
So if you MITM the target, and intercept an incoming ESP packet (which you
can't decrypt), you should be able to forge a new specially-crafted,
fragmented packet and stuff the ESP payload (still encrypted, as you
intercepted it) into it. The decryption succeeds and the target crashes.
2018-03-09 14:57:38 +03:00
|
|
|
{
|
|
|
|
KASSERT(m->m_flags & M_PKTHDR);
|
1999-06-28 10:36:47 +04:00
|
|
|
int plen = 0;
|
Fix a buffer overflow in ip6_get_prevhdr. Doing
mtod(m, char *) + len
is wrong, an option is allowed to be located in another mbuf of the chain.
If the offset of an option within the chain is bigger than the length of
the first mbuf in that chain, we are reading/writing one byte of packet-
controlled data beyond the end of the first mbuf.
The length of this first mbuf depends on the layout the network driver
chose. In the most difficult case, it will allocate a 2KB cluster, which
is bigger than the Ethernet MTU.
But there is at least one way of exploiting this case: by sending a
special combination of nested IPv6 fragments, the packet can control a
good bunch of 'len'. By luck, the memory pool containing clusters does not
embed the pool header in front of the items, so it is not straightforward
to predict what is located at 'mtod(m, char *) + len'.
However, by sending offending fragments in a loop, it is possible to
crash the kernel - at some point we will hit important data structures.
As far as I can tell, PF protects against this difficult case, because
it kicks nested fragments. NPF does not protect against this. IPF I don't
know.
Then there are the more easy cases, if the MTU is bigger than a cluster,
or if the network driver did not allocate a cluster, or perhaps if the
fragments are received via a tunnel; I haven't investigated these cases.
Change ip6_get_prevhdr so that it returns an offset in the chain, and
always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET
leaves M_PKTHDR untouched.
This place is still fragile.
2018-01-30 17:49:25 +03:00
|
|
|
for (t = m; t; t = t->m_next) {
|
1999-06-28 10:36:47 +04:00
|
|
|
plen += t->m_len;
|
Fix a buffer overflow in ip6_get_prevhdr. Doing
mtod(m, char *) + len
is wrong, an option is allowed to be located in another mbuf of the chain.
If the offset of an option within the chain is bigger than the length of
the first mbuf in that chain, we are reading/writing one byte of packet-
controlled data beyond the end of the first mbuf.
The length of this first mbuf depends on the layout the network driver
chose. In the most difficult case, it will allocate a 2KB cluster, which
is bigger than the Ethernet MTU.
But there is at least one way of exploiting this case: by sending a
special combination of nested IPv6 fragments, the packet can control a
good bunch of 'len'. By luck, the memory pool containing clusters does not
embed the pool header in front of the items, so it is not straightforward
to predict what is located at 'mtod(m, char *) + len'.
However, by sending offending fragments in a loop, it is possible to
crash the kernel - at some point we will hit important data structures.
As far as I can tell, PF protects against this difficult case, because
it kicks nested fragments. NPF does not protect against this. IPF I don't
know.
Then there are the more easy cases, if the MTU is bigger than a cluster,
or if the network driver did not allocate a cluster, or perhaps if the
fragments are received via a tunnel; I haven't investigated these cases.
Change ip6_get_prevhdr so that it returns an offset in the chain, and
always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET
leaves M_PKTHDR untouched.
This place is still fragile.
2018-01-30 17:49:25 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
m->m_pkthdr.len = plen;
|
2018-04-13 11:55:50 +03:00
|
|
|
/* XXX XXX: clear csum_flags? */
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2002-06-09 18:43:10 +04:00
|
|
|
|
Fix a buffer overflow in ip6_get_prevhdr. Doing
mtod(m, char *) + len
is wrong, an option is allowed to be located in another mbuf of the chain.
If the offset of an option within the chain is bigger than the length of
the first mbuf in that chain, we are reading/writing one byte of packet-
controlled data beyond the end of the first mbuf.
The length of this first mbuf depends on the layout the network driver
chose. In the most difficult case, it will allocate a 2KB cluster, which
is bigger than the Ethernet MTU.
But there is at least one way of exploiting this case: by sending a
special combination of nested IPv6 fragments, the packet can control a
good bunch of 'len'. By luck, the memory pool containing clusters does not
embed the pool header in front of the items, so it is not straightforward
to predict what is located at 'mtod(m, char *) + len'.
However, by sending offending fragments in a loop, it is possible to
crash the kernel - at some point we will hit important data structures.
As far as I can tell, PF protects against this difficult case, because
it kicks nested fragments. NPF does not protect against this. IPF I don't
know.
Then there are the more easy cases, if the MTU is bigger than a cluster,
or if the network driver did not allocate a cluster, or perhaps if the
fragments are received via a tunnel; I haven't investigated these cases.
Change ip6_get_prevhdr so that it returns an offset in the chain, and
always use IP6_EXTHDR_GET to get a writable pointer. IP6_EXTHDR_GET
leaves M_PKTHDR untouched.
This place is still fragile.
2018-01-30 17:49:25 +03:00
|
|
|
/*
|
|
|
|
* Restore NXT to the original.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
const int prvnxt = ip6_get_prevhdr(m, offset);
|
|
|
|
uint8_t *prvnxtp;
|
|
|
|
|
|
|
|
IP6_EXTHDR_GET(prvnxtp, uint8_t *, m, prvnxt,
|
|
|
|
sizeof(*prvnxtp));
|
|
|
|
if (prvnxtp == NULL) {
|
|
|
|
goto dropfrag;
|
|
|
|
}
|
|
|
|
*prvnxtp = nxt;
|
|
|
|
}
|
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATINC(IP6_STAT_REASSEMBLED);
|
1999-12-13 18:17:17 +03:00
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_ok);
|
2016-12-08 08:16:33 +03:00
|
|
|
rtcache_unref(rt, &ro);
|
2018-04-13 14:18:08 +03:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/*
|
2018-04-13 14:18:08 +03:00
|
|
|
* Tell launch routine the next header.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
*mp = m;
|
|
|
|
*offp = offset;
|
|
|
|
return nxt;
|
|
|
|
|
|
|
|
dropfrag:
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-12-13 18:17:17 +03:00
|
|
|
in6_ifstat_inc(dstifp, ifs6_reass_fail);
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATINC(IP6_STAT_FRAGDROPPED);
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(m);
|
2016-11-09 06:49:38 +03:00
|
|
|
done:
|
2016-12-08 08:16:33 +03:00
|
|
|
rtcache_unref(rt, &ro);
|
1999-06-28 10:36:47 +04:00
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
int
|
|
|
|
ip6_reass_packet(struct mbuf **mp, int offset)
|
|
|
|
{
|
|
|
|
|
2012-07-02 02:04:44 +04:00
|
|
|
if (frag6_input(mp, &offset, IPPROTO_IPV6) == IPPROTO_DONE) {
|
2011-11-04 04:22:33 +04:00
|
|
|
*mp = NULL;
|
2012-07-02 02:04:44 +04:00
|
|
|
return EINVAL;
|
2011-11-04 04:22:33 +04:00
|
|
|
}
|
2012-07-02 02:04:44 +04:00
|
|
|
return 0;
|
2011-11-04 04:22:33 +04:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Free a fragment reassembly header and all
|
|
|
|
* associated datagrams.
|
|
|
|
*/
|
2018-01-25 18:33:06 +03:00
|
|
|
static void
|
2007-05-23 21:14:59 +04:00
|
|
|
frag6_freef(struct ip6q *q6)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct ip6asfrag *af6, *down6;
|
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
KASSERT(mutex_owned(&frag6_lock));
|
2002-03-15 13:44:07 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
|
|
|
|
af6 = down6) {
|
2018-01-25 18:33:06 +03:00
|
|
|
struct mbuf *m = af6->ip6af_m;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
down6 = af6->ip6af_down;
|
|
|
|
frag6_deq(af6);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return ICMP time exceeded error for the 1st fragment.
|
|
|
|
* Just free other fragments.
|
|
|
|
*/
|
|
|
|
if (af6->ip6af_off == 0) {
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
|
|
|
|
/* adjust pointer */
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
|
2018-01-25 18:55:57 +03:00
|
|
|
/* restore source and destination addresses */
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6->ip6_src = q6->ip6q_src;
|
|
|
|
ip6->ip6_dst = q6->ip6q_dst;
|
|
|
|
|
|
|
|
icmp6_error(m, ICMP6_TIME_EXCEEDED,
|
|
|
|
ICMP6_TIME_EXCEED_REASSEMBLY, 0);
|
2012-07-02 02:04:44 +04:00
|
|
|
} else {
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(m);
|
2012-07-02 02:04:44 +04:00
|
|
|
}
|
|
|
|
kmem_intr_free(af6, sizeof(struct ip6asfrag));
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-01-25 18:33:06 +03:00
|
|
|
|
2023-08-29 20:01:35 +03:00
|
|
|
frag6_dropfrag(q6);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put an ip fragment on a reassembly chain.
|
|
|
|
* Like insque, but pointers in middle of structure.
|
|
|
|
*/
|
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
KASSERT(mutex_owned(&frag6_lock));
|
2002-03-15 13:44:07 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
af6->ip6af_up = up6;
|
|
|
|
af6->ip6af_down = up6->ip6af_down;
|
|
|
|
up6->ip6af_down->ip6af_up = af6;
|
|
|
|
up6->ip6af_down = af6;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To frag6_enq as remque is to insque.
|
|
|
|
*/
|
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
frag6_deq(struct ip6asfrag *af6)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
KASSERT(mutex_owned(&frag6_lock));
|
2002-03-15 13:44:07 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
af6->ip6af_up->ip6af_down = af6->ip6af_down;
|
|
|
|
af6->ip6af_down->ip6af_up = af6->ip6af_up;
|
|
|
|
}
|
|
|
|
|
2018-01-25 18:33:06 +03:00
|
|
|
/*
|
|
|
|
* Insert newq after oldq.
|
|
|
|
*/
|
2001-02-10 07:14:26 +03:00
|
|
|
void
|
2014-09-05 09:33:06 +04:00
|
|
|
frag6_insque(struct ip6q *newq, struct ip6q *oldq)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
KASSERT(mutex_owned(&frag6_lock));
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2014-09-05 09:33:06 +04:00
|
|
|
newq->ip6q_prev = oldq;
|
|
|
|
newq->ip6q_next = oldq->ip6q_next;
|
2018-01-25 18:33:06 +03:00
|
|
|
oldq->ip6q_next->ip6q_prev = newq;
|
2014-09-05 09:33:06 +04:00
|
|
|
oldq->ip6q_next = newq;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2018-01-25 18:33:06 +03:00
|
|
|
/*
|
|
|
|
* Unlink p6.
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
frag6_remque(struct ip6q *p6)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
KASSERT(mutex_owned(&frag6_lock));
|
2002-03-15 13:44:07 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
p6->ip6q_prev->ip6q_next = p6->ip6q_next;
|
|
|
|
p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
|
|
|
|
}
|
|
|
|
|
2011-05-03 21:44:30 +04:00
|
|
|
void
|
|
|
|
frag6_fasttimo(void)
|
|
|
|
{
|
2017-01-24 10:09:24 +03:00
|
|
|
|
2017-11-17 10:37:12 +03:00
|
|
|
SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
|
2011-12-16 04:57:59 +04:00
|
|
|
|
2011-05-03 21:44:30 +04:00
|
|
|
if (frag6_drainwanted) {
|
|
|
|
frag6_drain();
|
|
|
|
frag6_drainwanted = 0;
|
|
|
|
}
|
2011-12-16 04:57:59 +04:00
|
|
|
|
2017-11-17 10:37:12 +03:00
|
|
|
SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
|
2011-05-03 21:44:30 +04:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
2001-02-10 07:14:26 +03:00
|
|
|
* IPv6 reassembling timer processing;
|
1999-06-28 10:36:47 +04:00
|
|
|
* if a timer expires on a reassembly
|
|
|
|
* queue, discard it.
|
|
|
|
*/
|
|
|
|
void
|
2008-02-27 22:40:56 +03:00
|
|
|
frag6_slowtimo(void)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct ip6q *q6;
|
2008-04-24 15:38:36 +04:00
|
|
|
|
2017-11-17 10:37:12 +03:00
|
|
|
SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
|
2011-12-16 04:57:59 +04:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_enter(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
q6 = ip6q.ip6q_next;
|
2018-04-13 14:32:44 +03:00
|
|
|
if (q6) {
|
1999-06-28 10:36:47 +04:00
|
|
|
while (q6 != &ip6q) {
|
|
|
|
--q6->ip6q_ttl;
|
|
|
|
q6 = q6->ip6q_next;
|
|
|
|
if (q6->ip6q_prev->ip6q_ttl == 0) {
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATINC(IP6_STAT_FRAGTIMEOUT);
|
1999-12-13 18:17:17 +03:00
|
|
|
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
1999-06-28 10:36:47 +04:00
|
|
|
frag6_freef(q6->ip6q_prev);
|
|
|
|
}
|
|
|
|
}
|
2018-04-13 14:32:44 +03:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* If we are over the maximum number of fragments
|
|
|
|
* (due to the limit being lowered), drain off
|
|
|
|
* enough to get down to the new limit.
|
|
|
|
*/
|
2001-02-22 08:04:42 +03:00
|
|
|
while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
|
|
|
|
ip6q.ip6q_prev) {
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATINC(IP6_STAT_FRAGOVERFLOW);
|
1999-12-13 18:17:17 +03:00
|
|
|
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
1999-06-28 10:36:47 +04:00
|
|
|
frag6_freef(ip6q.ip6q_prev);
|
|
|
|
}
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2017-11-17 10:37:12 +03:00
|
|
|
SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
|
2011-12-16 04:57:59 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* Routing changes might produce a better route than we last used;
|
|
|
|
* make sure we notice eventually, even if forwarding only for one
|
|
|
|
* destination and the cache is never replaced.
|
|
|
|
*/
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
rtcache_free(&ip6_forward_rt);
|
|
|
|
rtcache_free(&ipsrcchk_rt);
|
1999-06-28 10:36:47 +04:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-05-03 21:44:30 +04:00
|
|
|
void
|
|
|
|
frag6_drainstub(void)
|
|
|
|
{
|
|
|
|
frag6_drainwanted = 1;
|
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Drain off all datagram fragments.
|
|
|
|
*/
|
|
|
|
void
|
2008-02-27 22:40:56 +03:00
|
|
|
frag6_drain(void)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-03-15 13:44:07 +03:00
|
|
|
|
2011-11-04 04:22:33 +04:00
|
|
|
if (mutex_tryenter(&frag6_lock)) {
|
2008-04-24 15:38:36 +04:00
|
|
|
while (ip6q.ip6q_next != &ip6q) {
|
|
|
|
IP6_STATINC(IP6_STAT_FRAGDROPPED);
|
|
|
|
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
|
|
|
|
frag6_freef(ip6q.ip6q_next);
|
|
|
|
}
|
2011-11-04 04:22:33 +04:00
|
|
|
mutex_exit(&frag6_lock);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
}
|