Bugfix for hns SRQ and SGE. And also added all related cleanups and refactorings. Signed-off-by: zhengfeng luo <luozhengfeng@h-partners.com> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
177 lines
4.8 KiB
Diff
177 lines
4.8 KiB
Diff
From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001
|
|
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
|
Date: Tue, 11 May 2021 19:06:37 +0800
|
|
Subject: libhns: Refactor the process of post_srq_recv
|
|
|
|
SRQ is a shared queue, it mainly consists of four parts:
|
|
|
|
1. wqe buf: wqe buf is used to store wqe data.
|
|
|
|
2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so
|
|
the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO
|
|
queue, it can instruct the hardware to obtain the corresponding WQE.
|
|
|
|
3.bitmap: bitmap is used to generate and release wqe_idx. When the user
|
|
has a new WR, the driver finds the idx of the idle wqe in bitmap. When the
|
|
CQE of wqe is generated, the driver releases the idx.
|
|
|
|
4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it
|
|
to the user when ibv_poll_cq() is invoked.
|
|
|
|
After refactor, the functions of the four parts are more clearer.
|
|
|
|
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
|
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++-------------
|
|
1 file changed, 57 insertions(+), 38 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index 0c15bdbe..b622eaef 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
|
return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
|
|
}
|
|
|
|
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
|
|
+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
|
{
|
|
return srq->buf.buf + (n << srq->wqe_shift);
|
|
}
|
|
@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
|
return cur >= srq->wqe_cnt - 1;
|
|
}
|
|
|
|
-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
|
+static int check_post_srq_valid(struct hns_roce_srq *srq,
|
|
+ struct ibv_recv_wr *wr)
|
|
+{
|
|
+ unsigned int max_sge = srq->max_gs - srq->rsv_sge;
|
|
+
|
|
+ if (hns_roce_v2_srqwq_overflow(srq))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ if (wr->num_sge > max_sge)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
|
|
{
|
|
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
|
int bit_num;
|
|
@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
|
return 0;
|
|
}
|
|
|
|
+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
|
|
+ struct ibv_recv_wr *wr)
|
|
+{
|
|
+ struct hns_roce_v2_wqe_data_seg *dseg;
|
|
+ int i;
|
|
+
|
|
+ dseg = get_srq_wqe(srq, wqe_idx);
|
|
+
|
|
+ for (i = 0; i < wr->num_sge; ++i) {
|
|
+ dseg[i].len = htole32(wr->sg_list[i].length);
|
|
+ dseg[i].lkey = htole32(wr->sg_list[i].lkey);
|
|
+ dseg[i].addr = htole64(wr->sg_list[i].addr);
|
|
+ }
|
|
+
|
|
+ /* hw stop reading when identify the last one */
|
|
+ if (srq->rsv_sge) {
|
|
+ dseg[i].len = htole32(INVALID_SGE_LENGTH);
|
|
+ dseg[i].lkey = htole32(0x0);
|
|
+ dseg[i].addr = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
|
+{
|
|
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
|
+ unsigned int head;
|
|
+ __le32 *idx_buf;
|
|
+
|
|
+ head = idx_que->head & (srq->wqe_cnt - 1);
|
|
+
|
|
+ idx_buf = get_idx_buf(idx_que, head);
|
|
+ *idx_buf = htole32(wqe_idx);
|
|
+
|
|
+ idx_que->head++;
|
|
+}
|
|
+
|
|
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
|
struct ibv_recv_wr *wr,
|
|
struct ibv_recv_wr **bad_wr)
|
|
{
|
|
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
|
|
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
|
|
- struct hns_roce_v2_wqe_data_seg *dseg;
|
|
struct hns_roce_db srq_db;
|
|
- unsigned int max_sge;
|
|
- __le32 *srq_idx;
|
|
+ unsigned int wqe_idx;
|
|
int ret = 0;
|
|
- int wqe_idx;
|
|
- void *wqe;
|
|
int nreq;
|
|
- int ind;
|
|
- int i;
|
|
|
|
pthread_spin_lock(&srq->lock);
|
|
|
|
- /* current idx of srqwq */
|
|
- ind = srq->idx_que.head & (srq->wqe_cnt - 1);
|
|
-
|
|
- max_sge = srq->max_gs - srq->rsv_sge;
|
|
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
|
- if (hns_roce_v2_srqwq_overflow(srq)) {
|
|
- ret = -ENOMEM;
|
|
- *bad_wr = wr;
|
|
- break;
|
|
- }
|
|
-
|
|
- if (wr->num_sge > max_sge) {
|
|
- ret = -EINVAL;
|
|
+ ret = check_post_srq_valid(srq, wr);
|
|
+ if (ret) {
|
|
*bad_wr = wr;
|
|
break;
|
|
}
|
|
@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
|
break;
|
|
}
|
|
|
|
- wqe = get_srq_wqe(srq, wqe_idx);
|
|
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
|
|
-
|
|
- for (i = 0; i < wr->num_sge; ++i) {
|
|
- set_data_seg_v2(dseg, wr->sg_list + i);
|
|
- dseg++;
|
|
- }
|
|
-
|
|
- /* hw stop reading when identify the last one */
|
|
- if (srq->rsv_sge)
|
|
- set_ending_data_seg(dseg);
|
|
-
|
|
- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind);
|
|
- *srq_idx = htole32(wqe_idx);
|
|
+ fill_srq_wqe(srq, wqe_idx, wr);
|
|
+ fill_wqe_idx(srq, wqe_idx);
|
|
|
|
srq->wrid[wqe_idx] = wr->wr_id;
|
|
- ind = (ind + 1) & (srq->wqe_cnt - 1);
|
|
}
|
|
|
|
if (nreq) {
|
|
- srq->idx_que.head += nreq;
|
|
-
|
|
/*
|
|
* Make sure that descriptors are written before
|
|
* we write doorbell record.
|
|
--
|
|
2.30.0
|
|
|