255 lines
8.5 KiB
Diff
255 lines
8.5 KiB
Diff
|
|
From 448d82b2c62f09f1dd9c8045d34623dedef1c111 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Luoyouming <luoyouming@huawei.com>
|
||
|
|
Date: Fri, 19 Nov 2021 20:21:21 +0800
|
||
|
|
Subject: [PATCH v4 04/10] libhns: Fix the problem of sge nums
|
||
|
|
|
||
|
|
Currently, the driver only uses max_send_sge to initialize sge num
|
||
|
|
when creating_qp. So, in the sq inline scenario, the driver may not
|
||
|
|
has enough sge to send data. For example, if max_send_sge is 16 and
|
||
|
|
max_inline_data is 1024, the driver needs 1024/16=64 sge to send data.
|
||
|
|
Therefore, the calculation method of sge num is modified to take the
|
||
|
|
maximum value of max_send_sge and max_inline_data/16 to solve this
|
||
|
|
problem.
|
||
|
|
|
||
|
|
Fixes:11c81d0e3a98("libhns: Refactor process of setting extended sge")
|
||
|
|
Fixes:b7814b7b9715("libhns: Support inline data in extented sge space for RC")
|
||
|
|
|
||
|
|
Signed-off-by: Luoyouming <luoyouming@huawei.com>
|
||
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
||
|
|
---
|
||
|
|
providers/hns/hns_roce_u.c | 9 +++-
|
||
|
|
providers/hns/hns_roce_u.h | 3 ++
|
||
|
|
providers/hns/hns_roce_u_abi.h | 2 +-
|
||
|
|
providers/hns/hns_roce_u_hw_v2.c | 13 +----
|
||
|
|
providers/hns/hns_roce_u_verbs.c | 84 ++++++++++++++++++++++++--------
|
||
|
|
5 files changed, 77 insertions(+), 34 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||
|
|
index a46ceb9..1bd5bb1 100644
|
||
|
|
--- a/providers/hns/hns_roce_u.c
|
||
|
|
+++ b/providers/hns/hns_roce_u.c
|
||
|
|
@@ -103,9 +103,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
{
|
||
|
|
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
|
||
|
|
struct hns_roce_alloc_ucontext_resp resp = {};
|
||
|
|
+ struct hns_roce_alloc_ucontext cmd = {};
|
||
|
|
struct ibv_device_attr dev_attrs;
|
||
|
|
struct hns_roce_context *context;
|
||
|
|
- struct ibv_get_context cmd;
|
||
|
|
int i;
|
||
|
|
|
||
|
|
context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
|
||
|
|
@@ -113,7 +113,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
if (!context)
|
||
|
|
return NULL;
|
||
|
|
|
||
|
|
- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd),
|
||
|
|
+ cmd.config |= HNS_ROCE_EXSGE_FLAGS;
|
||
|
|
+ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
|
||
|
|
&resp.ibv_resp, sizeof(resp)))
|
||
|
|
goto err_free;
|
||
|
|
|
||
|
|
@@ -124,6 +125,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
else
|
||
|
|
context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
|
||
|
|
|
||
|
|
+ context->config = resp.config;
|
||
|
|
+ if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS)
|
||
|
|
+ context->max_inline_data = resp.max_inline_data;
|
||
|
|
+
|
||
|
|
context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
|
||
|
|
HNS_ROCE_QP_TABLE_BITS);
|
||
|
|
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
|
||
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||
|
|
index 5d90634..5388f9c 100644
|
||
|
|
--- a/providers/hns/hns_roce_u.h
|
||
|
|
+++ b/providers/hns/hns_roce_u.h
|
||
|
|
@@ -213,6 +213,8 @@ struct hns_roce_context {
|
||
|
|
unsigned int max_srq_sge;
|
||
|
|
int max_cqe;
|
||
|
|
unsigned int cqe_size;
|
||
|
|
+ uint32_t config;
|
||
|
|
+ unsigned int max_inline_data;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct hns_roce_pd {
|
||
|
|
@@ -267,6 +269,7 @@ struct hns_roce_wq {
|
||
|
|
unsigned int head;
|
||
|
|
unsigned int tail;
|
||
|
|
unsigned int max_gs;
|
||
|
|
+ unsigned int ext_sge_cnt;
|
||
|
|
unsigned int rsv_sge;
|
||
|
|
unsigned int wqe_shift;
|
||
|
|
unsigned int shift; /* wq size is 2^shift */
|
||
|
|
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
|
||
|
|
index 333f977..2753d30 100644
|
||
|
|
--- a/providers/hns/hns_roce_u_abi.h
|
||
|
|
+++ b/providers/hns/hns_roce_u_abi.h
|
||
|
|
@@ -47,7 +47,7 @@ DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
|
||
|
|
hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
|
||
|
|
|
||
|
|
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
|
||
|
|
- empty, hns_roce_ib_alloc_ucontext_resp);
|
||
|
|
+ hns_roce_ib_alloc_ucontext, hns_roce_ib_alloc_ucontext_resp);
|
||
|
|
|
||
|
|
DECLARE_DRV_CMD(hns_roce_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
|
||
|
|
hns_roce_ib_create_qp, hns_roce_ib_create_qp_resp);
|
||
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||
|
|
index bb4298f..ebe68bc 100644
|
||
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||
|
|
@@ -841,14 +841,6 @@ static void get_src_buf_info(void **src_addr, uint32_t *src_len,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
|
||
|
|
-{
|
||
|
|
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
|
||
|
|
- return 0;
|
||
|
|
-
|
||
|
|
- return HNS_ROCE_SGE_IN_WQE;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
||
|
|
struct hns_roce_sge_info *sge_info,
|
||
|
|
const void *buf_list,
|
||
|
|
@@ -858,12 +850,9 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
||
|
|
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
|
||
|
|
void *dst_addr, *src_addr, *tail_bound_addr;
|
||
|
|
uint32_t src_len, tail_len;
|
||
|
|
- unsigned int std_sge_num;
|
||
|
|
int i;
|
||
|
|
|
||
|
|
- std_sge_num = get_std_sge_num(qp);
|
||
|
|
- if (sge_info->total_len >
|
||
|
|
- (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE)
|
||
|
|
+ if (sge_info->total_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE)
|
||
|
|
return EINVAL;
|
||
|
|
|
||
|
|
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
|
||
|
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||
|
|
index ba7f2ae..851b145 100644
|
||
|
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||
|
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||
|
|
@@ -978,41 +978,88 @@ err_alloc:
|
||
|
|
return -ENOMEM;
|
||
|
|
}
|
||
|
|
|
||
|
|
-static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
|
||
|
|
+/**
|
||
|
|
+ * Calculated sge num according to attr's max_send_sge
|
||
|
|
+ */
|
||
|
|
+static unsigned int get_sge_num_from_max_send_sge(bool is_ud,
|
||
|
|
+ uint32_t max_send_sge)
|
||
|
|
{
|
||
|
|
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
|
||
|
|
- return qp->sq.max_gs;
|
||
|
|
+ unsigned int std_sge_num;
|
||
|
|
+ unsigned int min_sge;
|
||
|
|
|
||
|
|
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
|
||
|
|
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
|
||
|
|
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
|
||
|
|
+ min_sge = is_ud ? 1 : 0;
|
||
|
|
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
|
||
|
|
+ min_sge;
|
||
|
|
+}
|
||
|
|
|
||
|
|
- return 0;
|
||
|
|
+/**
|
||
|
|
+ * Calculated sge num according to attr's max_inline_data
|
||
|
|
+ */
|
||
|
|
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud,
|
||
|
|
+ uint32_t max_inline_data)
|
||
|
|
+{
|
||
|
|
+ unsigned int inline_sge = 0;
|
||
|
|
+
|
||
|
|
+ inline_sge = max_inline_data / HNS_ROCE_SGE_SIZE;
|
||
|
|
+ /*
|
||
|
|
+ * if max_inline_data less than
|
||
|
|
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
|
||
|
|
+ * In addition to ud's mode, no need to extend sge.
|
||
|
|
+ */
|
||
|
|
+ if (!is_ud && (inline_sge <= HNS_ROCE_SGE_IN_WQE))
|
||
|
|
+ inline_sge = 0;
|
||
|
|
+
|
||
|
|
+ return inline_sge;
|
||
|
|
}
|
||
|
|
|
||
|
|
-static void set_ext_sge_param(struct hns_roce_device *hr_dev,
|
||
|
|
+static void set_ext_sge_param(struct hns_roce_context *ctx,
|
||
|
|
struct ibv_qp_init_attr_ex *attr,
|
||
|
|
struct hns_roce_qp *qp, unsigned int wr_cnt)
|
||
|
|
{
|
||
|
|
+ bool is_ud = (qp->verbs_qp.qp.qp_type == IBV_QPT_UD);
|
||
|
|
+ unsigned int ext_wqe_sge_cnt;
|
||
|
|
+ unsigned int inline_ext_sge;
|
||
|
|
unsigned int total_sge_cnt;
|
||
|
|
- unsigned int wqe_sge_cnt;
|
||
|
|
+ unsigned int std_sge_num;
|
||
|
|
|
||
|
|
qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
|
||
|
|
-
|
||
|
|
- qp->sq.max_gs = attr->cap.max_send_sge;
|
||
|
|
-
|
||
|
|
- wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
|
||
|
|
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
|
||
|
|
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud,
|
||
|
|
+ attr->cap.max_send_sge);
|
||
|
|
+
|
||
|
|
+ if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) {
|
||
|
|
+ attr->cap.max_inline_data = min_t(uint32_t, roundup_pow_of_two(
|
||
|
|
+ attr->cap.max_inline_data),
|
||
|
|
+ ctx->max_inline_data);
|
||
|
|
+
|
||
|
|
+ inline_ext_sge = max(ext_wqe_sge_cnt,
|
||
|
|
+ get_sge_num_from_max_inl_data(is_ud,
|
||
|
|
+ attr->cap.max_inline_data));
|
||
|
|
+ qp->sq.ext_sge_cnt = inline_ext_sge ?
|
||
|
|
+ roundup_pow_of_two(inline_ext_sge) : 0;
|
||
|
|
+ qp->sq.max_gs = min((qp->sq.ext_sge_cnt + std_sge_num),
|
||
|
|
+ ctx->max_sge);
|
||
|
|
+
|
||
|
|
+ ext_wqe_sge_cnt = qp->sq.ext_sge_cnt;
|
||
|
|
+ } else {
|
||
|
|
+ qp->sq.max_gs = max(1U, attr->cap.max_send_sge);
|
||
|
|
+ qp->sq.max_gs = min(qp->sq.max_gs, ctx->max_sge);
|
||
|
|
+ qp->sq.ext_sge_cnt = qp->sq.max_gs;
|
||
|
|
+ }
|
||
|
|
|
||
|
|
/* If the number of extended sge is not zero, they MUST use the
|
||
|
|
* space of HNS_HW_PAGE_SIZE at least.
|
||
|
|
*/
|
||
|
|
- if (wqe_sge_cnt) {
|
||
|
|
- total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
|
||
|
|
- qp->ex_sge.sge_cnt =
|
||
|
|
- max(total_sge_cnt,
|
||
|
|
- (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
|
||
|
|
+ if (ext_wqe_sge_cnt) {
|
||
|
|
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * ext_wqe_sge_cnt);
|
||
|
|
+ qp->ex_sge.sge_cnt = max(total_sge_cnt,
|
||
|
|
+ (unsigned int)HNS_HW_PAGE_SIZE /
|
||
|
|
+ HNS_ROCE_SGE_SIZE);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
+
|
||
|
|
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||
|
|
struct hns_roce_qp *qp,
|
||
|
|
struct hns_roce_context *ctx)
|
||
|
|
@@ -1044,10 +1091,9 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||
|
|
qp->sq.wqe_cnt = cnt;
|
||
|
|
qp->sq.shift = hr_ilog32(cnt);
|
||
|
|
|
||
|
|
- set_ext_sge_param(hr_dev, attr, qp, cnt);
|
||
|
|
+ set_ext_sge_param(ctx, attr, qp, cnt);
|
||
|
|
|
||
|
|
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
|
||
|
|
- qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
|
||
|
|
|
||
|
|
qp->sq_signal_bits = attr->sq_sig_all ? 0 : 1;
|
||
|
|
|
||
|
|
--
|
||
|
|
2.30.0
|
||
|
|
|