Backport bugfix for hns SRQ and SGE from rdma-core V36.

Bugfix for hns SRQ and SGE. And also added all related cleanups and
refactorings.

Signed-off-by: zhengfeng luo <luozhengfeng@h-partners.com>
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
This commit is contained in:
zhengfeng luo 2022-07-13 21:40:41 +08:00 committed by Chengchang Tang
parent 4a6d042324
commit 1725c90650
9 changed files with 988 additions and 1 deletions

View File

@ -0,0 +1,70 @@
From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:35 +0800
Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR
If the user post a list of WRs, the head in the for loop is not updated in
time, and the judgment of if (head == tail) becomes invalid.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++----
providers/hns/hns_roce_u_verbs.c | 2 +-
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 82124082..0c15bdbe 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
return ret;
}
+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int cur;
+
+ cur = idx_que->head - idx_que->tail;
+ return cur >= srq->wqe_cnt - 1;
+}
+
static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (wr->num_sge > max_sge) {
- ret = -EINVAL;
+ if (hns_roce_v2_srqwq_overflow(srq)) {
+ ret = -ENOMEM;
*bad_wr = wr;
break;
}
- if (srq->idx_que.head == srq->idx_que.tail) {
- ret = -ENOMEM;
+ if (wr->num_sge > max_sge) {
+ ret = -EINVAL;
*bad_wr = wr;
break;
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 3abf7b48..dace35fd 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
idx_que->bitmap[i] = ~(0UL);
idx_que->head = 0;
- idx_que->tail = srq->wqe_cnt - 1;
+ idx_que->tail = 0;
return 0;
}
--
2.30.0

View File

@ -0,0 +1,30 @@
From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:36 +0800
Subject: libhns: Allow users to create a 0-depth SRQs
Users is allowed to create 0-depth SRQs, so the judgement about whether
max_wr is zero should be removed.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index dace35fd..2d1a6de3 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context,
init_attr->srq_type != IBV_SRQT_XRC)
return -EINVAL;
- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge ||
+ if (!init_attr->attr.max_sge ||
init_attr->attr.max_wr > context->max_srq_wr ||
init_attr->attr.max_sge > context->max_srq_sge)
return -EINVAL;
--
2.30.0

View File

@ -0,0 +1,176 @@
From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:37 +0800
Subject: libhns: Refactor the process of post_srq_recv
SRQ is a shared queue, it mainly consists of four parts:
1. wqe buf: wqe buf is used to store wqe data.
2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so
the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO
queue, it can instruct the hardware to obtain the corresponding WQE.
3.bitmap: bitmap is used to generate and release wqe_idx. When the user
has a new WR, the driver finds the idx of the idle wqe in bitmap. When the
CQE of wqe is generated, the driver releases the idx.
4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it
to the user when ibv_poll_cq() is invoked.
After refactor, the functions of the four parts are more clearer.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++-------------
1 file changed, 57 insertions(+), 38 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 0c15bdbe..b622eaef 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
{
return srq->buf.buf + (n << srq->wqe_shift);
}
@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
return cur >= srq->wqe_cnt - 1;
}
-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
+static int check_post_srq_valid(struct hns_roce_srq *srq,
+ struct ibv_recv_wr *wr)
+{
+ unsigned int max_sge = srq->max_gs - srq->rsv_sge;
+
+ if (hns_roce_v2_srqwq_overflow(srq))
+ return -ENOMEM;
+
+ if (wr->num_sge > max_sge)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
int bit_num;
@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
return 0;
}
+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
+ struct ibv_recv_wr *wr)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ int i;
+
+ dseg = get_srq_wqe(srq, wqe_idx);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = htole32(wr->sg_list[i].length);
+ dseg[i].lkey = htole32(wr->sg_list[i].lkey);
+ dseg[i].addr = htole64(wr->sg_list[i].addr);
+ }
+
+ /* hw stop reading when identify the last one */
+ if (srq->rsv_sge) {
+ dseg[i].len = htole32(INVALID_SGE_LENGTH);
+ dseg[i].lkey = htole32(0x0);
+ dseg[i].addr = 0;
+ }
+}
+
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *idx_buf;
+
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ idx_buf = get_idx_buf(idx_que, head);
+ *idx_buf = htole32(wqe_idx);
+
+ idx_que->head++;
+}
+
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
struct ibv_recv_wr *wr,
struct ibv_recv_wr **bad_wr)
{
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_db srq_db;
- unsigned int max_sge;
- __le32 *srq_idx;
+ unsigned int wqe_idx;
int ret = 0;
- int wqe_idx;
- void *wqe;
int nreq;
- int ind;
- int i;
pthread_spin_lock(&srq->lock);
- /* current idx of srqwq */
- ind = srq->idx_que.head & (srq->wqe_cnt - 1);
-
- max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_v2_srqwq_overflow(srq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- break;
- }
-
- if (wr->num_sge > max_sge) {
- ret = -EINVAL;
+ ret = check_post_srq_valid(srq, wr);
+ if (ret) {
*bad_wr = wr;
break;
}
@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
break;
}
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
-
- /* hw stop reading when identify the last one */
- if (srq->rsv_sge)
- set_ending_data_seg(dseg);
-
- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind);
- *srq_idx = htole32(wqe_idx);
+ fill_srq_wqe(srq, wqe_idx, wr);
+ fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->wqe_cnt - 1);
}
if (nreq) {
- srq->idx_que.head += nreq;
-
/*
* Make sure that descriptors are written before
* we write doorbell record.
--
2.30.0

View File

@ -0,0 +1,33 @@
From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:38 +0800
Subject: libhns: Set srqlimit to 0 when creating SRQ
According to the IB specification, the srq_limt parameter should not be
configured when creating srq. But the libhns does not set attr.srq_limit
to 0 currently. As a result, when attr.srq_limit provided by the user is
not 0, the value of attr.srq_limit returned to the user will be different
from that obtained by ibv_query_srq(). Therefore, the driver should set
attr.srq_limit to 0 when creating SRQ.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 2d1a6de3..107da753 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
attr->max_sge = srq->max_gs;
+ attr->srq_limit = 0;
ret = hns_roce_create_idx_que(srq);
if (ret)
--
2.30.0

View File

@ -0,0 +1,367 @@
From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:39 +0800
Subject: libhns: Refactor the process of create_srq
Reorganize create_srq() as several sub-functions to make the process
clearer.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u.h | 7 +-
providers/hns/hns_roce_u_hw_v2.c | 2 +-
providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++-------------
3 files changed, 105 insertions(+), 82 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index b3f48113..a437727c 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -211,7 +211,8 @@ struct hns_roce_idx_que {
struct hns_roce_srq {
struct verbs_srq verbs_srq;
- struct hns_roce_buf buf;
+ struct hns_roce_idx_que idx_que;
+ struct hns_roce_buf wqe_buf;
pthread_spinlock_t lock;
unsigned long *wrid;
unsigned int srqn;
@@ -221,7 +222,6 @@ struct hns_roce_srq {
unsigned int wqe_shift;
unsigned int *db;
unsigned short counter;
- struct hns_roce_idx_que idx_que;
};
struct hns_roce_wq {
@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
{
- return container_of(container_of(ibv_srq, struct verbs_srq, srq),
- struct hns_roce_srq, verbs_srq);
+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq);
}
static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b622eaef..d4e7e4f9 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
{
- return srq->buf.buf + (n << srq->wqe_shift);
+ return srq->wqe_buf.buf + (n << srq->wqe_shift);
}
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 107da753..75b9e530 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
ctx->srq_table_shift;
+ pthread_mutex_lock(&ctx->srq_table_mutex);
+
if (!ctx->srq_table[tind].refcnt) {
ctx->srq_table[tind].table =
calloc(ctx->srq_table_mask + 1,
sizeof(struct hns_roce_srq *));
- if (!ctx->srq_table[tind].table)
+ if (!ctx->srq_table[tind].table) {
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
return -ENOMEM;
+ }
}
++ctx->srq_table[tind].refcnt;
ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq;
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
+
return 0;
}
@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
{
uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
+ pthread_mutex_lock(&ctx->srq_table_mutex);
+
if (!--ctx->srq_table[tind].refcnt)
free(ctx->srq_table[tind].table);
else
ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL;
+
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
+}
+
+static int verify_srq_create_attr(struct hns_roce_context *context,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ if (attr->srq_type != IBV_SRQT_BASIC &&
+ attr->srq_type != IBV_SRQT_XRC)
+ return -EINVAL;
+
+ if (!attr->attr.max_sge ||
+ attr->attr.max_wr > context->max_srq_wr ||
+ attr->attr.max_sge > context->max_srq_sge)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
+ srq->rsv_sge = 1;
+
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
+ srq->max_gs));
+ attr->attr.max_sge = srq->max_gs;
+ attr->attr.srq_limit = 0;
}
-static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
+static int alloc_srq_idx_que(struct hns_roce_srq *srq)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
unsigned int buf_size;
@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
BIT_CNT_PER_LONG;
idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long));
if (!idx_que->bitmap)
- return ENOMEM;
+ return -ENOMEM;
buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift);
if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) {
free(idx_que->bitmap);
idx_que->bitmap = NULL;
- return ENOMEM;
+ return -ENOMEM;
}
/* init the idx_que bitmap */
@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
return 0;
}
-static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq)
{
- int srq_buf_size;
+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long));
- if (!srq->wrid)
- return ENOMEM;
+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE);
+}
- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
- srq->max_gs));
- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
+static int alloc_srq_buf(struct hns_roce_srq *srq)
+{
+ int ret;
- /* allocate srq wqe buf */
- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) {
- free(srq->wrid);
- return ENOMEM;
+ ret = alloc_srq_idx_que(srq);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_wqe_buf(srq);
+ if (ret)
+ goto err_idx_que;
+
+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid));
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_wqe_buf;
}
return 0;
-}
-static int hns_roce_verify_srq(struct hns_roce_context *context,
- struct ibv_srq_init_attr_ex *init_attr)
-{
- if (init_attr->srq_type != IBV_SRQT_BASIC &&
- init_attr->srq_type != IBV_SRQT_XRC)
- return -EINVAL;
+err_wqe_buf:
+ hns_roce_free_buf(&srq->wqe_buf);
+err_idx_que:
+ hns_roce_free_buf(&srq->idx_que.buf);
+ free(srq->idx_que.bitmap);
- if (!init_attr->attr.max_sge ||
- init_attr->attr.max_wr > context->max_srq_wr ||
- init_attr->attr.max_sge > context->max_srq_sge)
- return -EINVAL;
+ return ret;
+}
- return 0;
+static void free_srq_buf(struct hns_roce_srq *srq)
+{
+ free(srq->wrid);
+ hns_roce_free_buf(&srq->wqe_buf);
+ hns_roce_free_buf(&srq->idx_que.buf);
+ free(srq->idx_que.bitmap);
}
static int exec_srq_create_cmd(struct ibv_context *context,
@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context,
struct hns_roce_create_srq_ex cmd_ex = {};
int ret;
- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf;
+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf;
cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf;
cmd_ex.db_addr = (uintptr_t)srq->db;
@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context,
static struct ibv_srq *create_srq(struct ibv_context *context,
struct ibv_srq_init_attr_ex *init_attr)
{
- struct hns_roce_context *ctx = to_hr_ctx(context);
- struct ibv_srq_attr *attr = &init_attr->attr;
+ struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_srq *srq;
int ret;
- if (hns_roce_verify_srq(ctx, init_attr))
- return NULL;
+ ret = verify_srq_create_attr(hr_ctx, init_attr);
+ if (ret)
+ goto err;
srq = calloc(1, sizeof(*srq));
- if (!srq)
- return NULL;
+ if (!srq) {
+ ret = -ENOMEM;
+ goto err;
+ }
if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
goto err_free_srq;
- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
- srq->rsv_sge = 1;
-
- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
- attr->max_sge = srq->max_gs;
- attr->srq_limit = 0;
-
- ret = hns_roce_create_idx_que(srq);
- if (ret)
+ set_srq_param(context, srq, init_attr);
+ if (alloc_srq_buf(srq))
goto err_free_srq;
- ret = hns_roce_alloc_srq_buf(srq);
- if (ret)
- goto err_idx_que;
-
- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB);
if (!srq->db)
goto err_srq_buf;
- *(srq->db) = 0;
-
- pthread_mutex_lock(&ctx->srq_table_mutex);
+ *srq->db = 0;
ret = exec_srq_create_cmd(context, srq, init_attr);
if (ret)
goto err_srq_db;
- ret = hns_roce_store_srq(ctx, srq);
+ ret = hns_roce_store_srq(hr_ctx, srq);
if (ret)
goto err_destroy_srq;
- pthread_mutex_unlock(&ctx->srq_table_mutex);
-
- srq->max_gs = attr->max_sge;
- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge);
+ srq->max_gs = init_attr->attr.max_sge;
+ init_attr->attr.max_sge =
+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
return &srq->verbs_srq.srq;
@@ -617,20 +651,19 @@ err_destroy_srq:
ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
err_srq_db:
- pthread_mutex_unlock(&ctx->srq_table_mutex);
- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
err_srq_buf:
- free(srq->wrid);
- hns_roce_free_buf(&srq->buf);
-
-err_idx_que:
- free(srq->idx_que.bitmap);
- hns_roce_free_buf(&srq->idx_que.buf);
+ free_srq_buf(srq);
err_free_srq:
free(srq);
+err:
+ if (ret < 0)
+ ret = -ret;
+
+ errno = ret;
return NULL;
}
@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
int ret;
- pthread_mutex_lock(&ctx->srq_table_mutex);
-
ret = ibv_cmd_destroy_srq(ibv_srq);
- if (ret) {
- pthread_mutex_unlock(&ctx->srq_table_mutex);
+ if (ret)
return ret;
- }
hns_roce_clear_srq(ctx, srq->srqn);
- pthread_mutex_unlock(&ctx->srq_table_mutex);
-
hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
- hns_roce_free_buf(&srq->buf);
- free(srq->wrid);
- hns_roce_free_buf(&srq->idx_que.buf);
- free(srq->idx_que.bitmap);
+ free_srq_buf(srq);
free(srq);
return 0;
--
2.30.0

View File

@ -0,0 +1,69 @@
From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Tue, 11 May 2021 19:06:40 +0800
Subject: libhns: Remove the reserved wqe of SRQ
There is an unreasonable reserved WQE in SRQ, it should be removed.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_hw_v2.c | 4 +---
providers/hns/hns_roce_u_verbs.c | 5 ++++-
3 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index a437727c..0d7abd81 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -64,6 +64,7 @@
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_V1_MIN_WQE_NUM 0x20
#define HNS_ROCE_V2_MIN_WQE_NUM 0x40
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
#define HNS_ROCE_CQE_SIZE 0x20
#define HNS_ROCE_V3_CQE_SIZE 0x40
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d4e7e4f9..2fb6cdaf 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
{
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- unsigned int cur;
- cur = idx_que->head - idx_que->tail;
- return cur >= srq->wqe_cnt - 1;
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
}
static int check_post_srq_valid(struct hns_roce_srq *srq,
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 75b9e530..4847639b 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context,
attr->attr.max_sge > context->max_srq_sge)
return -EINVAL;
+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr,
+ HNS_ROCE_MIN_SRQ_WQE_NUM);
+
return 0;
}
@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
srq->rsv_sge = 1;
- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr);
srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
srq->max_gs));
--
2.30.0

View File

@ -0,0 +1,89 @@
From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001
From: Weihang Li <liweihang@huawei.com>
Date: Fri, 14 May 2021 10:02:56 +0800
Subject: libhns: Refactor process of setting extended sge
Refactor and encapsulate the parts of getting number of extended sge a WQE
can use to make it easier to understand.
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------
1 file changed, 29 insertions(+), 16 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 30ab072a..a8508fc5 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -920,31 +920,44 @@ err_alloc:
return -ENOMEM;
}
-static void set_extend_sge_param(struct hns_roce_device *hr_dev,
- struct ibv_qp_init_attr_ex *attr,
- struct hns_roce_qp *qp, unsigned int wr_cnt)
+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
{
- int cnt = 0;
+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
+ return qp->sq.max_gs;
+
+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+
+ return 0;
+}
+
+static void set_ext_sge_param(struct hns_roce_device *hr_dev,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp, unsigned int wr_cnt)
+{
+ unsigned int total_sge_cnt;
+ unsigned int wqe_sge_cnt;
+
+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
if (hr_dev->hw_version == HNS_ROCE_HW_VER1) {
qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
- } else {
- qp->sq.max_gs = attr->cap.max_send_sge;
- if (attr->qp_type == IBV_QPT_UD)
- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs);
- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- cnt = roundup_pow_of_two(wr_cnt *
- (qp->sq.max_gs -
- HNS_ROCE_SGE_IN_WQE));
+ return;
}
- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ qp->sq.max_gs = attr->cap.max_send_sge;
+
+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- qp->ex_sge.sge_cnt = cnt ?
- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0;
+ if (wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
+ qp->ex_sge.sge_cnt =
+ max(total_sge_cnt,
+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
}
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
qp->sq.wqe_cnt = cnt;
qp->sq.shift = hr_ilog32(cnt);
- set_extend_sge_param(hr_dev, attr, qp, cnt);
+ set_ext_sge_param(hr_dev, attr, qp, cnt);
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
--
2.30.0

View File

@ -0,0 +1,139 @@
From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Fri, 14 May 2021 10:02:57 +0800
Subject: libhns: Optimize set_sge process
Use local variables to avoid frequent ldr/str operations. And because UD's
process of setting sge is more simple then RC, set_sge() can be splited
into two functions for compiler optimization.
Signed-off-by: Lang Cheng <chenglang@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
1 file changed, 61 insertions(+), 22 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4988943a..dc79a6f8 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
return 0;
}
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
- struct hns_roce_sge_info *sge_info)
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
{
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int flag;
int i;
- sge_info->valid_num = 0;
- sge_info->total_len = 0;
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
- for (i = 0; i < wr->num_sge; i++) {
- if (unlikely(!wr->sg_list[i].length))
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
continue;
- sge_info->total_len += wr->sg_list[i].length;
- sge_info->valid_num++;
+ len += sge->length;
+ cnt++;
- if (wr->send_flags & IBV_SEND_INLINE &&
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
+ if (flag)
continue;
- /* No inner sge in UD wqe */
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
- set_data_seg_v2(dseg, wr->sg_list + i);
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
+ set_data_seg_v2(dseg, sge);
dseg++;
} else {
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
- (qp->ex_sge.sge_cnt - 1));
- set_data_seg_v2(dseg, wr->sg_list + i);
- sge_info->start_idx++;
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
}
}
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
+}
+
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
+ struct hns_roce_sge_info *sge_info)
+{
+ int flag = wr->send_flags & IBV_SEND_INLINE;
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
+ uint32_t index = sge_info->start_idx;
+ struct ibv_sge *sge = wr->sg_list;
+ uint32_t len = 0;
+ uint32_t cnt = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++, sge++) {
+ if (unlikely(!sge->length))
+ continue;
+
+ len += sge->length;
+ cnt++;
+
+ if (flag)
+ continue;
+
+ /* No inner sge in UD wqe */
+ dseg = get_send_sge_ex(qp, index & mask);
+ set_data_seg_v2(dseg, sge);
+ index++;
+ }
+
+ sge_info->start_idx = index;
+ sge_info->valid_num = cnt;
+ sge_info->total_len = len;
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
UD_SQ_WQE_MSG_START_SGE_IDX_S,
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
wqe += sizeof(struct hns_roce_rc_sq_wqe);
dseg = wqe;
- set_sge(dseg, qp, wr, sge_info);
+ set_rc_sge(dseg, qp, wr, sge_info);
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
--
2.30.0

View File

@ -1,6 +1,6 @@
Name: rdma-core Name: rdma-core
Version: 35.1 Version: 35.1
Release: 2 Release: 3
Summary: RDMA core userspace libraries and daemons Summary: RDMA core userspace libraries and daemons
License: GPLv2 or BSD License: GPLv2 or BSD
Url: https://github.com/linux-rdma/rdma-core Url: https://github.com/linux-rdma/rdma-core
@ -15,6 +15,14 @@ Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch
Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch
Patch7: 0007-libhns-Add-support-for-direct-wqe.patch Patch7: 0007-libhns-Add-support-for-direct-wqe.patch
Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch
Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch
Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch
Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch
Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch
Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch
Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch
Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch
Patch16: 0016-libhns-Optimize-set_sge-process.patch
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
@ -259,6 +267,12 @@ fi
%{_mandir}/* %{_mandir}/*
%changelog %changelog
* Mon Jul 11 2022 luozhengfeng <luozhengfeng@h-partners.com> - 35.1-3
- Type: bugfix
- ID: NA
- SUG: NA
- DESC: bugfix and refactor for hns SRQ and SGE
* Mon Jan 10 2022 tangchengchang <tangchengchang@huawei.com> - 35.1-2 * Mon Jan 10 2022 tangchengchang <tangchengchang@huawei.com> - 35.1-2
- Type: requirement - Type: requirement
- ID: NA - ID: NA