filter-rewriter: Add TCP state machine and fix memory leak in connection_track_table
We add almost full TCP state machine in filter-rewriter, except TCPS_LISTEN and some simplify in VM active close FIN states. The reason for this simplify job is because guest kernel will track the TCP status and wait 2MSL time too, if client resend the FIN packet, guest will resend the last ACK, so we needn't wait 2MSL time in filter-rewriter. After a net connection is closed, we didn't clear its related resources in connection_track_table, which will lead to memory leak. Let's track the state of net connection, if it is closed, its related resources will be cleared up. Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> Signed-off-by: Zhang Chen <zhangckid@gmail.com> Signed-off-by: Zhang Chen <chen.zhang@intel.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
This commit is contained in:
parent
77f7c74719
commit
6214231abd
@ -137,7 +137,7 @@ Connection *connection_new(ConnectionKey *key)
|
|||||||
conn->ip_proto = key->ip_proto;
|
conn->ip_proto = key->ip_proto;
|
||||||
conn->processing = false;
|
conn->processing = false;
|
||||||
conn->offset = 0;
|
conn->offset = 0;
|
||||||
conn->syn_flag = 0;
|
conn->tcp_state = TCPS_CLOSED;
|
||||||
conn->pack = 0;
|
conn->pack = 0;
|
||||||
conn->sack = 0;
|
conn->sack = 0;
|
||||||
g_queue_init(&conn->primary_list);
|
g_queue_init(&conn->primary_list);
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "slirp/slirp.h"
|
#include "slirp/slirp.h"
|
||||||
#include "qemu/jhash.h"
|
#include "qemu/jhash.h"
|
||||||
#include "qemu/timer.h"
|
#include "qemu/timer.h"
|
||||||
|
#include "slirp/tcp.h"
|
||||||
|
|
||||||
#define HASHTABLE_MAX_SIZE 16384
|
#define HASHTABLE_MAX_SIZE 16384
|
||||||
|
|
||||||
@ -81,11 +82,9 @@ typedef struct Connection {
|
|||||||
uint32_t sack;
|
uint32_t sack;
|
||||||
/* offset = secondary_seq - primary_seq */
|
/* offset = secondary_seq - primary_seq */
|
||||||
tcp_seq offset;
|
tcp_seq offset;
|
||||||
/*
|
|
||||||
* we use this flag update offset func
|
int tcp_state; /* TCP FSM state */
|
||||||
* run once in independent tcp connection
|
tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */
|
||||||
*/
|
|
||||||
int syn_flag;
|
|
||||||
} Connection;
|
} Connection;
|
||||||
|
|
||||||
uint32_t connection_key_hash(const void *opaque);
|
uint32_t connection_key_hash(const void *opaque);
|
||||||
|
@ -59,9 +59,9 @@ static int is_tcp_packet(Packet *pkt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* handle tcp packet from primary guest */
|
/* handle tcp packet from primary guest */
|
||||||
static int handle_primary_tcp_pkt(NetFilterState *nf,
|
static int handle_primary_tcp_pkt(RewriterState *rf,
|
||||||
Connection *conn,
|
Connection *conn,
|
||||||
Packet *pkt)
|
Packet *pkt, ConnectionKey *key)
|
||||||
{
|
{
|
||||||
struct tcphdr *tcp_pkt;
|
struct tcphdr *tcp_pkt;
|
||||||
|
|
||||||
@ -74,23 +74,28 @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
|
|||||||
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
|
||||||
|
conn->tcp_state == TCPS_SYN_SENT) {
|
||||||
|
conn->tcp_state = TCPS_ESTABLISHED;
|
||||||
|
}
|
||||||
|
|
||||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
||||||
/*
|
/*
|
||||||
* we use this flag update offset func
|
* we use this flag update offset func
|
||||||
* run once in independent tcp connection
|
* run once in independent tcp connection
|
||||||
*/
|
*/
|
||||||
conn->syn_flag = 1;
|
conn->tcp_state = TCPS_SYN_RECEIVED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
|
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
|
||||||
if (conn->syn_flag) {
|
if (conn->tcp_state == TCPS_SYN_RECEIVED) {
|
||||||
/*
|
/*
|
||||||
* offset = secondary_seq - primary seq
|
* offset = secondary_seq - primary seq
|
||||||
* ack packet sent by guest from primary node,
|
* ack packet sent by guest from primary node,
|
||||||
* so we use th_ack - 1 get primary_seq
|
* so we use th_ack - 1 get primary_seq
|
||||||
*/
|
*/
|
||||||
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
|
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
|
||||||
conn->syn_flag = 0;
|
conn->tcp_state = TCPS_ESTABLISHED;
|
||||||
}
|
}
|
||||||
if (conn->offset) {
|
if (conn->offset) {
|
||||||
/* handle packets to the secondary from the primary */
|
/* handle packets to the secondary from the primary */
|
||||||
@ -99,15 +104,66 @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
|
|||||||
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
|
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
|
||||||
pkt->size - pkt->vnet_hdr_len);
|
pkt->size - pkt->vnet_hdr_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Passive close step 3
|
||||||
|
*/
|
||||||
|
if ((conn->tcp_state == TCPS_LAST_ACK) &&
|
||||||
|
(ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
|
||||||
|
conn->tcp_state = TCPS_CLOSED;
|
||||||
|
g_hash_table_remove(rf->connection_track_table, key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
|
||||||
|
/*
|
||||||
|
* Passive close.
|
||||||
|
* Step 1:
|
||||||
|
* The *server* side of this connect is VM, *client* tries to close
|
||||||
|
* the connection. We will into CLOSE_WAIT status.
|
||||||
|
*
|
||||||
|
* Step 2:
|
||||||
|
* In this step we will into LAST_ACK status.
|
||||||
|
*
|
||||||
|
* We got 'fin=1, ack=1' packet from server side, we need to
|
||||||
|
* record the seq of 'fin=1, ack=1' packet.
|
||||||
|
*
|
||||||
|
* Step 3:
|
||||||
|
* We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
|
||||||
|
* packet from server side. From this point, we can ensure that there
|
||||||
|
* will be no packets in the connection, except that, some errors
|
||||||
|
* happen between the path of 'filter object' and vNIC, if this rare
|
||||||
|
* case really happen, we can still create a new connection,
|
||||||
|
* So it is safe to remove the connection from connection_track_table.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
if (conn->tcp_state == TCPS_ESTABLISHED) {
|
||||||
|
conn->tcp_state = TCPS_CLOSE_WAIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Active close step 2.
|
||||||
|
*/
|
||||||
|
if (conn->tcp_state == TCPS_FIN_WAIT_1) {
|
||||||
|
conn->tcp_state = TCPS_TIME_WAIT;
|
||||||
|
/*
|
||||||
|
* For simplify implementation, we needn't wait 2MSL time
|
||||||
|
* in filter rewriter. Because guest kernel will track the
|
||||||
|
* TCP status and wait 2MSL time, if client resend the FIN
|
||||||
|
* packet, guest will apply the last ACK too.
|
||||||
|
*/
|
||||||
|
conn->tcp_state = TCPS_CLOSED;
|
||||||
|
g_hash_table_remove(rf->connection_track_table, key);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* handle tcp packet from secondary guest */
|
/* handle tcp packet from secondary guest */
|
||||||
static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
static int handle_secondary_tcp_pkt(RewriterState *rf,
|
||||||
Connection *conn,
|
Connection *conn,
|
||||||
Packet *pkt)
|
Packet *pkt, ConnectionKey *key)
|
||||||
{
|
{
|
||||||
struct tcphdr *tcp_pkt;
|
struct tcphdr *tcp_pkt;
|
||||||
|
|
||||||
@ -121,7 +177,8 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
|||||||
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
|
if (conn->tcp_state == TCPS_SYN_RECEIVED &&
|
||||||
|
((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
|
||||||
/*
|
/*
|
||||||
* save offset = secondary_seq and then
|
* save offset = secondary_seq and then
|
||||||
* in handle_primary_tcp_pkt make offset
|
* in handle_primary_tcp_pkt make offset
|
||||||
@ -130,6 +187,12 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
|||||||
conn->offset = ntohl(tcp_pkt->th_seq);
|
conn->offset = ntohl(tcp_pkt->th_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* VM active connect */
|
||||||
|
if (conn->tcp_state == TCPS_CLOSED &&
|
||||||
|
((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
||||||
|
conn->tcp_state = TCPS_SYN_SENT;
|
||||||
|
}
|
||||||
|
|
||||||
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
|
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
|
||||||
/* Only need to adjust seq while offset is Non-zero */
|
/* Only need to adjust seq while offset is Non-zero */
|
||||||
if (conn->offset) {
|
if (conn->offset) {
|
||||||
@ -141,6 +204,32 @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Passive close step 2:
|
||||||
|
*/
|
||||||
|
if (conn->tcp_state == TCPS_CLOSE_WAIT &&
|
||||||
|
(tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
|
||||||
|
conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
|
||||||
|
conn->tcp_state = TCPS_LAST_ACK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Active close
|
||||||
|
*
|
||||||
|
* Step 1:
|
||||||
|
* The *server* side of this connect is VM, *server* tries to close
|
||||||
|
* the connection.
|
||||||
|
*
|
||||||
|
* Step 2:
|
||||||
|
* We will into CLOSE_WAIT status.
|
||||||
|
* We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
|
||||||
|
* CLOSING status.
|
||||||
|
*/
|
||||||
|
if (conn->tcp_state == TCPS_ESTABLISHED &&
|
||||||
|
(tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
|
||||||
|
conn->tcp_state = TCPS_FIN_WAIT_1;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,7 +279,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
|
|||||||
|
|
||||||
if (sender == nf->netdev) {
|
if (sender == nf->netdev) {
|
||||||
/* NET_FILTER_DIRECTION_TX */
|
/* NET_FILTER_DIRECTION_TX */
|
||||||
if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
|
if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
|
||||||
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||||
(const uint8_t *)pkt->data, pkt->size, NULL);
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||||
packet_destroy(pkt, NULL);
|
packet_destroy(pkt, NULL);
|
||||||
@ -203,7 +292,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* NET_FILTER_DIRECTION_RX */
|
/* NET_FILTER_DIRECTION_RX */
|
||||||
if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
|
if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
|
||||||
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||||
(const uint8_t *)pkt->data, pkt->size, NULL);
|
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||||
packet_destroy(pkt, NULL);
|
packet_destroy(pkt, NULL);
|
||||||
|
Loading…
Reference in New Issue
Block a user