lwip/0038-add-tso.patch
2023-02-11 20:33:16 +08:00

381 lines
12 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Sat, 3 Dec 2022 21:38:09 +0800
Subject: [PATCH] add-tso
---
src/core/ipv4/etharp.c | 17 +++-
src/core/ipv4/ip4.c | 10 ++-
src/core/tcp.c | 6 ++
src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++--
src/include/dpdk_cksum.h | 2 +-
src/include/lwip/pbuf.h | 8 +-
src/include/lwipopts.h | 4 +
7 files changed, 211 insertions(+), 14 deletions(-)
diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c
index effb7db..f1903e4 100644
--- a/src/core/ipv4/etharp.c
+++ b/src/core/ipv4/etharp.c
@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et
struct pbuf *p = arp_table[i].q;
arp_table[i].q = NULL;
#endif /* ARP_QUEUEING */
+#if USE_LIBOS
+ struct pbuf *tmp = p->next;
+ while (tmp != NULL) {
+ tmp->ref--;
+ tmp = tmp->next;
+ }
+#endif
/* send the queued IP packet */
ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP);
/* free the queued IP packet */
@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q)
} else {
/* referencing the old pbuf is enough */
p = q;
- pbuf_ref(p);
+#if USE_LIBOS
+ struct pbuf *tmp = p;
+ while (tmp != NULL) {
+ pbuf_ref(tmp);
+ tmp = tmp->next;
+ }
+#else
+ pbuf_ref(p);
+#endif
}
/* packet could be taken over? */
if (p != NULL) {
diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c
index 1334cdc..d823491 100644
--- a/src/core/ipv4/ip4.c
+++ b/src/core/ipv4/ip4.c
@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d
#endif /* ENABLE_LOOPBACK */
#if IP_FRAG
/* don't fragment if interface has mtu set to 0 [loopif] */
- if (netif->mtu && (p->tot_len > netif->mtu)) {
- return ip4_frag(p, netif, dest);
+#if USE_LIBOS
+ if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) {
+#endif
+ if (netif->mtu && (p->tot_len > netif->mtu)) {
+ return ip4_frag(p, netif, dest);
+ }
+#if USE_LIBOS
}
+#endif
#endif /* IP_FRAG */
LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n"));
diff --git a/src/core/tcp.c b/src/core/tcp.c
index 7c18408..51ada38 100644
--- a/src/core/tcp.c
+++ b/src/core/tcp.c
@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg)
seg->p = NULL;
#endif /* TCP_DEBUG */
}
+#if !USE_LIBOS
memp_free(MEMP_TCP_SEG, seg);
+#endif
}
}
@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg)
LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL);
+#if USE_LIBOS
+ cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom));
+#else
cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG);
if (cseg == NULL) {
return NULL;
}
+#endif
SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg));
pbuf_ref(cseg->p);
return cseg;
diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c
index 2834ba3..ee6f40b 100644
--- a/src/core/tcp_out.c
+++ b/src/core/tcp_out.c
@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
* The TCP header is filled in except ackno and wnd.
* p is freed on failure.
*/
+#if USE_LIBOS
+void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags,
+ u32_t seqno, u8_t optflags)
+{
+ u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
+
+ seg->flags = optflags;
+ seg->next = NULL;
+ seg->p = p;
+ seg->len = p->tot_len - optlen;
+
+ /* build TCP header */
+ pbuf_add_header(p, TCP_HLEN);
+ seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
+ seg->tcphdr->src = lwip_htons(pcb->local_port);
+ seg->tcphdr->dest = lwip_htons(pcb->remote_port);
+ seg->tcphdr->seqno = lwip_htonl(seqno);
+
+ TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags);
+ seg->tcphdr->urgp = 0;
+}
+
+static struct tcp_seg *
+tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
+{
+ struct tcp_seg *seg;
+
+ seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom));
+
+ tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags);
+
+ return seg;
+}
+#else
static struct tcp_seg *
tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
{
@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32
seg->tcphdr->urgp = 0;
return seg;
}
+#endif
/**
* Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts)
}
#endif
+#if USE_LIBOS
+static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg)
+{
+ if (TCP_TCPLEN(seg) > 0) {
+ seg->next = NULL;
+ if (useg == NULL) {
+ pcb->unacked = seg;
+ useg = seg;
+ } else {
+ if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
+ /* add segment to before tail of unacked list, keeping the list sorted */
+ struct tcp_seg **cur_seg = &(pcb->unacked);
+ while (*cur_seg &&
+ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
+ cur_seg = &((*cur_seg)->next );
+ }
+ seg->next = (*cur_seg);
+ (*cur_seg) = seg;
+ } else {
+ /* add segment to tail of unacked list */
+ useg->next = seg;
+ useg = seg;
+ }
+ }
+ } else {
+ tcp_seg_free(seg);
+ }
+
+ return useg;
+}
+static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt)
+{
+ if (pcb->state != SYN_SENT) {
+ TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
+ }
+
+ err_t err = tcp_output_segment(seg, pcb, netif);
+ if (err != ERR_OK) {
+ /* segment could not be sent, for whatever reason */
+ tcp_set_flags(pcb, TF_NAGLEMEMERR);
+ return err;
+ }
+
+ if (pcb->state != SYN_SENT) {
+ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
+ }
+
+ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
+ pcb->snd_nxt = snd_nxt;
+ }
+
+ return ERR_OK;
+}
+#endif
/**
* @ingroup tcp_raw
* Find out what we can send and send it
@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb)
for (; useg->next != NULL; useg = useg->next);
}
/* data available and window allows it to be sent? */
+
#if USE_LIBOS
- /* avoid send cose too much time, limit send pkts num max 10 */
- uint16_t send_pkt = 0;
- while (seg != NULL && send_pkt < 10 &&
- lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
- send_pkt++;
-#else
+ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) {
+ while(seg) {
+ /**
+ * 1遍历unsent队列找到所有的待发送seg. 将seg的buf串起来
+ * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉
+ * 3) 若成功则更新snd_nxt, unacked队列和unsent队列。
+ */
+ struct tcp_seg *start_seg = seg;
+ struct pbuf *first_pbuf = NULL;
+ struct pbuf *pre_pbuf = NULL;
+ u8_t pbuf_chain_len = 0;
+ u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno);
+ while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) {
+ u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno);
+ if (seg_seqno - pcb->lastack + seg->len > wnd) {
+ if (first_pbuf)
+ break;
+ else
+ goto output_done;
+ }
+
+ if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
+ if (first_pbuf)
+ break;
+ else
+ goto output_done;
+ }
+
+ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) {
+ break;
+ }
+ if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) {
+ break;
+ }
+
+ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len);
+ if (first_pbuf == NULL) {
+ first_pbuf = seg->p;
+ } else {
+ first_pbuf->tot_len += seg->p->len;
+ pre_pbuf->next = seg->p;
+ }
+
+ pre_pbuf = seg->p;
+ next_seqno = seg_seqno + TCP_TCPLEN(seg);
+ seg = seg->next;
+ pcb->unsent = seg;
+ pbuf_chain_len++;
+ }
+
+ if (first_pbuf == NULL) {
+ err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len);
+ if (err != ERR_OK)
+ return err;
+ pcb->unsent = seg->next;
+ useg = tcp_output_over(pcb, seg, useg);
+ seg = pcb->unsent;
+ continue;
+ }
+
+ struct tcp_seg new_seg;
+ tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0);
+
+ err = tcp_output_seg(pcb, &new_seg, netif, next_seqno);
+
+ for (u32_t i = 0; i < pbuf_chain_len; i++) {
+ struct tcp_seg *next_seg = start_seg->next;
+ start_seg->p->next = NULL;
+ useg = tcp_output_over(pcb, start_seg, useg);
+ start_seg = next_seg;
+ }
+
+ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr));
+ new_seg.p->tot_len = new_seg.p->len;
+ }
+ } else
+#endif
+{
while (seg != NULL &&
lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
-#endif
LWIP_ASSERT("RST not expected here!",
(TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
/* Stop sending if the nagle algorithm would prevent it
@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb)
}
seg = pcb->unsent;
}
+}
#if TCP_OVERSIZE
if (pcb->unsent == NULL) {
/* last unsent has been removed, reset unsent_oversize */
@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif
IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
#if CHECKSUM_GEN_TCP_HW
if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) {
- tcph_cksum_set(seg->p, TCP_HLEN);
+ tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr));
seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
} else {
#if TCP_CHECKSUM_ON_COPY
diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h
index e57be4d..83c9c38 100644
--- a/src/include/dpdk_cksum.h
+++ b/src/include/dpdk_cksum.h
@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) {
#include <rte_ip.h>
static inline void tcph_cksum_set(struct pbuf *p, u16_t len) {
- (void)len;
+ p->l4_len = len;
p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
}
diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h
index 87cd960..ef879da 100644
--- a/src/include/lwip/pbuf.h
+++ b/src/include/lwip/pbuf.h
@@ -223,10 +223,14 @@ struct pbuf {
#if USE_LIBOS && CHECKSUM_OFFLOAD_ALL
/** checksum offload ol_flags */
u64_t ol_flags;
- /** checksum offload l2_len */
+ /* < L2 (MAC) Header Length for non-tunneling pkt. */
u64_t l2_len:7;
- /** checksum offload l3_len */
+ /* < L3 (IP) Header Length. */
u64_t l3_len:9;
+ /* < L4 (TCP/UDP) Header Length. */
+ u64_t l4_len:8;
+ u16_t header_off;
+ u8_t rexmit;
#endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */
/** In case the user needs to store data custom data on a pbuf */
diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h
index a5add21..7c819d0 100644
--- a/src/include/lwipopts.h
+++ b/src/include/lwipopts.h
@@ -173,6 +173,10 @@
#define ARP_QUEUE_LEN 32
+#define MAX_PBUF_CHAIN_LEN 40
+
+#define MIN_TSO_SEG_LEN 256
+
/* ---------------------------------------
* ------- NIC offloads --------
* ---------------------------------------
--
2.23.0