diff --git a/0038-add-tso.patch b/0038-add-tso.patch new file mode 100644 index 0000000..457efe8 --- /dev/null +++ b/0038-add-tso.patch @@ -0,0 +1,380 @@ +From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Sat, 3 Dec 2022 21:38:09 +0800 +Subject: [PATCH] add-tso + +--- + src/core/ipv4/etharp.c | 17 +++- + src/core/ipv4/ip4.c | 10 ++- + src/core/tcp.c | 6 ++ + src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++-- + src/include/dpdk_cksum.h | 2 +- + src/include/lwip/pbuf.h | 8 +- + src/include/lwipopts.h | 4 + + 7 files changed, 211 insertions(+), 14 deletions(-) + +diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c +index effb7db..f1903e4 100644 +--- a/src/core/ipv4/etharp.c ++++ b/src/core/ipv4/etharp.c +@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et + struct pbuf *p = arp_table[i].q; + arp_table[i].q = NULL; + #endif /* ARP_QUEUEING */ ++#if USE_LIBOS ++ struct pbuf *tmp = p->next; ++ while (tmp != NULL) { ++ tmp->ref--; ++ tmp = tmp->next; ++ } ++#endif + /* send the queued IP packet */ + ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP); + /* free the queued IP packet */ +@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q) + } else { + /* referencing the old pbuf is enough */ + p = q; +- pbuf_ref(p); ++#if USE_LIBOS ++ struct pbuf *tmp = p; ++ while (tmp != NULL) { ++ pbuf_ref(tmp); ++ tmp = tmp->next; ++ } ++#else ++ pbuf_ref(p); ++#endif + } + /* packet could be taken over? */ + if (p != NULL) { +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index 1334cdc..d823491 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + #endif /* ENABLE_LOOPBACK */ + #if IP_FRAG + /* don't fragment if interface has mtu set to 0 [loopif] */ +- if (netif->mtu && (p->tot_len > netif->mtu)) { +- return ip4_frag(p, netif, dest); ++#if USE_LIBOS ++ if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { ++#endif ++ if (netif->mtu && (p->tot_len > netif->mtu)) { ++ return ip4_frag(p, netif, dest); ++ } ++#if USE_LIBOS + } ++#endif + #endif /* IP_FRAG */ + + LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n")); +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 7c18408..51ada38 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg) + seg->p = NULL; + #endif /* TCP_DEBUG */ + } ++#if !USE_LIBOS + memp_free(MEMP_TCP_SEG, seg); ++#endif + } + } + +@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg) + + LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL); + ++#if USE_LIBOS ++ cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom)); ++#else + cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG); + if (cseg == NULL) { + return NULL; + } ++#endif + SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg)); + pbuf_ref(cseg->p); + return cseg; +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 2834ba3..ee6f40b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) + * The TCP header is filled in except ackno and wnd. + * p is freed on failure. + */ ++#if USE_LIBOS ++void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, ++ u32_t seqno, u8_t optflags) ++{ ++ u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb); ++ ++ seg->flags = optflags; ++ seg->next = NULL; ++ seg->p = p; ++ seg->len = p->tot_len - optlen; ++ ++ /* build TCP header */ ++ pbuf_add_header(p, TCP_HLEN); ++ seg->tcphdr = (struct tcp_hdr *)seg->p->payload; ++ seg->tcphdr->src = lwip_htons(pcb->local_port); ++ seg->tcphdr->dest = lwip_htons(pcb->remote_port); ++ seg->tcphdr->seqno = lwip_htonl(seqno); ++ ++ TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags); ++ seg->tcphdr->urgp = 0; ++} ++ ++static struct tcp_seg * ++tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) ++{ ++ struct tcp_seg *seg; ++ ++ seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom)); ++ ++ tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags); ++ ++ return seg; ++} ++#else + static struct tcp_seg * + tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) + { +@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32 + seg->tcphdr->urgp = 0; + return seg; + } ++#endif + + /** + * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end. +@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts) + } + #endif + ++#if USE_LIBOS ++static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) ++{ ++ if (TCP_TCPLEN(seg) > 0) { ++ seg->next = NULL; ++ if (useg == NULL) { ++ pcb->unacked = seg; ++ useg = seg; ++ } else { ++ if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { ++ /* add segment to before tail of unacked list, keeping the list sorted */ ++ struct tcp_seg **cur_seg = &(pcb->unacked); ++ while (*cur_seg && ++ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { ++ cur_seg = &((*cur_seg)->next ); ++ } ++ seg->next = (*cur_seg); ++ (*cur_seg) = seg; ++ } else { ++ /* add segment to tail of unacked list */ ++ useg->next = seg; ++ useg = seg; ++ } ++ } ++ } else { ++ tcp_seg_free(seg); ++ } ++ ++ return useg; ++} ++static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt) ++{ ++ if (pcb->state != SYN_SENT) { ++ TCPH_SET_FLAG(seg->tcphdr, TCP_ACK); ++ } ++ ++ err_t err = tcp_output_segment(seg, pcb, netif); ++ if (err != ERR_OK) { ++ /* segment could not be sent, for whatever reason */ ++ tcp_set_flags(pcb, TF_NAGLEMEMERR); ++ return err; ++ } ++ ++ if (pcb->state != SYN_SENT) { ++ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); ++ } ++ ++ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { ++ pcb->snd_nxt = snd_nxt; ++ } ++ ++ return ERR_OK; ++} ++#endif + /** + * @ingroup tcp_raw + * Find out what we can send and send it +@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb) + for (; useg->next != NULL; useg = useg->next); + } + /* data available and window allows it to be sent? */ ++ + #if USE_LIBOS +- /* avoid send cose too much time, limit send pkts num max 10 */ +- uint16_t send_pkt = 0; +- while (seg != NULL && send_pkt < 10 && +- lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { +- send_pkt++; +-#else ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { ++ while(seg) { ++ /** ++ * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 ++ * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 ++ * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。 ++ */ ++ struct tcp_seg *start_seg = seg; ++ struct pbuf *first_pbuf = NULL; ++ struct pbuf *pre_pbuf = NULL; ++ u8_t pbuf_chain_len = 0; ++ u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); ++ while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) { ++ u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); ++ if (seg_seqno - pcb->lastack + seg->len > wnd) { ++ if (first_pbuf) ++ break; ++ else ++ goto output_done; ++ } ++ ++ if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { ++ if (first_pbuf) ++ break; ++ else ++ goto output_done; ++ } ++ ++ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) { ++ break; ++ } ++ if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { ++ break; ++ } ++ ++ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); ++ if (first_pbuf == NULL) { ++ first_pbuf = seg->p; ++ } else { ++ first_pbuf->tot_len += seg->p->len; ++ pre_pbuf->next = seg->p; ++ } ++ ++ pre_pbuf = seg->p; ++ next_seqno = seg_seqno + seg->len; ++ seg = seg->next; ++ pcb->unsent = seg; ++ pbuf_chain_len++; ++ } ++ ++ if (first_pbuf == NULL) { ++ err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); ++ if (err != ERR_OK) ++ return err; ++ pcb->unsent = seg->next; ++ useg = tcp_output_over(pcb, seg, useg); ++ seg = pcb->unsent; ++ continue; ++ } ++ ++ struct tcp_seg new_seg; ++ tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); ++ ++ err = tcp_output_seg(pcb, &new_seg, netif, next_seqno); ++ ++ for (u32_t i = 0; i < pbuf_chain_len; i++) { ++ struct tcp_seg *next_seg = start_seg->next; ++ start_seg->p->next = NULL; ++ useg = tcp_output_over(pcb, start_seg, useg); ++ start_seg = next_seg; ++ } ++ ++ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); ++ new_seg.p->tot_len = new_seg.p->len; ++ } ++ } else ++#endif ++{ + while (seg != NULL && + lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { +-#endif + LWIP_ASSERT("RST not expected here!", + (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); + /* Stop sending if the nagle algorithm would prevent it +@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb) + } + seg = pcb->unsent; + } ++} + #if TCP_OVERSIZE + if (pcb->unsent == NULL) { + /* last unsent has been removed, reset unsent_oversize */ +@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { + #if CHECKSUM_GEN_TCP_HW + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { +- tcph_cksum_set(seg->p, TCP_HLEN); ++ tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr)); + seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); + } else { + #if TCP_CHECKSUM_ON_COPY +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index e57be4d..83c9c38 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { + #include + + static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { +- (void)len; ++ p->l4_len = len; + p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; + } + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 87cd960..ef879da 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -223,10 +223,14 @@ struct pbuf { + #if USE_LIBOS && CHECKSUM_OFFLOAD_ALL + /** checksum offload ol_flags */ + u64_t ol_flags; +- /** checksum offload l2_len */ ++ /* < L2 (MAC) Header Length for non-tunneling pkt. */ + u64_t l2_len:7; +- /** checksum offload l3_len */ ++ /* < L3 (IP) Header Length. */ + u64_t l3_len:9; ++ /* < L4 (TCP/UDP) Header Length. */ ++ u64_t l4_len:8; ++ u16_t header_off; ++ u8_t rexmit; + #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index a5add21..7c819d0 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -173,6 +173,10 @@ + + #define ARP_QUEUE_LEN 32 + ++#define MAX_PBUF_CHAIN_LEN 40 ++ ++#define MIN_TSO_SEG_LEN 256 ++ + /* --------------------------------------- + * ------- NIC offloads -------- + * --------------------------------------- +-- +2.23.0 + diff --git a/0039-optimize-app-thread-write-buff-block.patch b/0039-optimize-app-thread-write-buff-block.patch new file mode 100644 index 0000000..42280e3 --- /dev/null +++ b/0039-optimize-app-thread-write-buff-block.patch @@ -0,0 +1,94 @@ +From be541628552ccc3a8dcd3c6ad6e5a1aed07c4928 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sat, 3 Dec 2022 20:35:34 +0800 +Subject: [PATCH 2/2] fix app thread write fail + +--- + src/core/tcp_out.c | 2 +- + src/include/lwip/pbuf.h | 3 +++ + src/include/lwipsock.h | 33 +++++++++++++++++++++++---------- + 3 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index ee6f40b..f53750b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -763,7 +763,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + + pos += seglen; + #if USE_LIBOS +- write_lwip_over((struct lwip_sock*)arg, 1); ++ write_lwip_over((struct lwip_sock*)arg); + #endif + } + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index ef879da..10e2af9 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -231,6 +231,9 @@ struct pbuf { + u64_t l4_len:8; + u16_t header_off; + u8_t rexmit; ++ u8_t in_write; ++ u8_t head; ++ struct pbuf *last; + #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 2ffb077..f919330 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -93,17 +93,30 @@ struct lwip_sock { + #endif + + #if USE_LIBOS +- struct pbuf *send_lastdata; +- uint16_t send_datalen; +- volatile uint32_t events __rte_cache_aligned; /* available events */ +- struct pbuf *recv_lastdata __rte_cache_aligned; /* unread data in one pbuf */ +- struct list_node recv_list __rte_cache_aligned; +- struct list_node event_list __rte_cache_aligned; +- struct list_node send_list __rte_cache_aligned; +- uint32_t in_send __rte_cache_aligned; /* avoid sock too much send rpc msg*/ ++ char pad0 __rte_cache_aligned; ++ /* app thread use */ ++ struct pbuf *recv_lastdata; /* unread data in one pbuf */ ++ uint16_t remain_len; + uint32_t epoll_events; /* registered events, EPOLLONESHOT write frequently */ +- char pad __rte_cache_aligned; ++ volatile uint32_t events; /* available events */ ++ struct list_node event_list; ++ ++ char pad1 __rte_cache_aligned; ++ /* app and stack thread all use */ ++ uint32_t in_send; /* avoid sock too much send rpc msg*/ ++ pthread_spinlock_t sock_lock; ++ ++ char pad2 __rte_cache_aligned; ++ /* stack thread all use */ ++ struct list_node recv_list; ++ struct list_node send_list; ++ struct pbuf *send_lastdata; ++ struct pbuf *send_pre_del; ++ uint64_t recv_all; ++ uint64_t send_all; + ++ char pad3 __rte_cache_aligned; ++ /* nerver change */ + struct wakeup_poll *wakeup; + epoll_data_t ep_data; + struct lwip_sock *listen_next; /* listenfd list */ +@@ -131,7 +144,7 @@ extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apifla + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); + extern void gazelle_init_sock(int32_t fd); + extern void gazelle_clean_sock(int32_t fd); +-extern void write_lwip_over(struct lwip_sock *sock, uint32_t n); ++extern void write_lwip_over(struct lwip_sock *sock); + #endif /* USE_LIBOS */ + + struct lwip_sock *get_socket(int s); +-- +2.8.4.windows.1 + diff --git a/lwip.spec b/lwip.spec index 34ef652..7713237 100644 --- a/lwip.spec +++ b/lwip.spec @@ -4,7 +4,7 @@ Summary: lwip is a small independent implementation of the TCP/IP protocol suite Name: lwip Version: 2.1.3 -Release: 27 +Release: 28 License: BSD URL: http://savannah.nongnu.org/projects/lwip/ Source0: http://download.savannah.nongnu.org/releases/lwip/%{name}-%{version}.zip @@ -49,6 +49,8 @@ Patch9033: 0034-add-accept4-and-epoll_create1.patch Patch9034: 0035-add-writev-and-readv.patch Patch9035: 0036-add-fs-secure-compilation-option.patch Patch9036: 0037-enable-ARP-QUEUE-to-avoid-sync-packet-dropped.patch +Patch9037: 0038-add-tso.patch +Patch9038: 0039-optimize-app-thread-write-buff-block.patch BuildRequires: gcc-c++ dos2unix dpdk-devel @@ -102,6 +104,8 @@ find %{_builddir}/%{name}-%{version} -type f -exec dos2unix -q {} \; %patch9034 -p1 %patch9035 -p1 %patch9036 -p1 +%patch9037 -p1 +%patch9038 -p1 %build cd %{_builddir}/%{name}-%{version}/src @@ -117,6 +121,9 @@ cd %{_builddir}/%{name}-%{version}/src %{_libdir}/liblwip.a %changelog +* Sat Dec 3 2022 wuchangsheng - 2.1.3-28 +- add tso define + * Thu Dec 01 2022 jiangheng - 2.1.3-27 - remove lwip-2.1.3.tar.gz