Update to v50.0

Update version of rdma-core to v50.0.The subsequent maintenance
and upgrade will be performed based on this baseline.

Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
This commit is contained in:
Ran Zhou 2024-02-06 16:03:33 +08:00
parent ea5b17cb96
commit 66a7e0b9a7
84 changed files with 13 additions and 66694 deletions

View File

@ -1,45 +0,0 @@
From 0de1678211e710c2cd33e3aea98b1271cae9bd98 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 20 Sep 2022 11:47:45 +0800
Subject: [PATCH v4 01/10] libhns: Use a constant instead of sizeof operation
The sge size is known to be constant, so it's unnecessary to use sizeof to
calculate.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 0169250..d9ea18e 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -847,13 +847,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
uint32_t num_buf,
enum hns_roce_wr_buf_type buf_type)
{
- unsigned int sge_sz = sizeof(struct hns_roce_v2_wqe_data_seg);
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
void *dst_addr, *src_addr, *tail_bound_addr;
uint32_t src_len, tail_len;
int i;
- if (sge_info->total_len > qp->sq.max_gs * sge_sz)
+ if (sge_info->total_len > qp->sq.max_gs * HNS_ROCE_SGE_SIZE)
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
@@ -880,7 +879,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
}
}
- sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, sge_sz);
+ sge_info->valid_num = DIV_ROUND_UP(sge_info->total_len, HNS_ROCE_SGE_SIZE);
sge_info->start_idx += sge_info->valid_num;
return 0;
--
2.30.0

View File

@ -1,53 +0,0 @@
From a57d5dfbc2701b9d0c47eb70a1bb82b16170a7d2 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 20 Sep 2022 11:53:18 +0800
Subject: [PATCH v4 02/10] libhns: Fix ext_sge num error when post send
The max_gs is the sum of extended sge and standard sge. In function
fill_ext_sge_inl_data, max_gs does not subtract the number of extended
sges, but is directly used to calculate the size of extended sges.
Fixes:b7814b7b9715("libhns: Support inline data in extented sge space for RC")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d9ea18e..bb4298f 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -841,6 +841,14 @@ static void get_src_buf_info(void **src_addr, uint32_t *src_len,
}
}
+static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
+{
+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
+ return 0;
+
+ return HNS_ROCE_SGE_IN_WQE;
+}
+
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
struct hns_roce_sge_info *sge_info,
const void *buf_list,
@@ -850,9 +858,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
void *dst_addr, *src_addr, *tail_bound_addr;
uint32_t src_len, tail_len;
+ unsigned int std_sge_num;
int i;
- if (sge_info->total_len > qp->sq.max_gs * HNS_ROCE_SGE_SIZE)
+ std_sge_num = get_std_sge_num(qp);
+ if (sge_info->total_len >
+ (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE)
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
--
2.30.0

View File

@ -1,46 +0,0 @@
From f20816984da80e2fe9a82b3b330f85150763243e Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Mon, 26 Sep 2022 11:14:05 +0800
Subject: [PATCH v4 03/10] Update kernel headers
To commit ?? ("RDMA/hns: Fix the problem of sge nums").
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index f6fde06..fc83dfb 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
+enum {
+ HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+};
+
+enum {
+ HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+};
+
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
__u32 srq_tab_size;
__u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
+};
+
+struct hns_roce_ib_alloc_ucontext {
+ __u32 config;
+ __u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {
--
2.30.0

View File

@ -1,254 +0,0 @@
From 448d82b2c62f09f1dd9c8045d34623dedef1c111 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Fri, 19 Nov 2021 20:21:21 +0800
Subject: [PATCH v4 04/10] libhns: Fix the problem of sge nums
Currently, the driver only uses max_send_sge to initialize sge num
when creating_qp. So, in the sq inline scenario, the driver may not
has enough sge to send data. For example, if max_send_sge is 16 and
max_inline_data is 1024, the driver needs 1024/16=64 sge to send data.
Therefore, the calculation method of sge num is modified to take the
maximum value of max_send_sge and max_inline_data/16 to solve this
problem.
Fixes:11c81d0e3a98("libhns: Refactor process of setting extended sge")
Fixes:b7814b7b9715("libhns: Support inline data in extented sge space for RC")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 9 +++-
providers/hns/hns_roce_u.h | 3 ++
providers/hns/hns_roce_u_abi.h | 2 +-
providers/hns/hns_roce_u_hw_v2.c | 13 +----
providers/hns/hns_roce_u_verbs.c | 84 ++++++++++++++++++++++++--------
5 files changed, 77 insertions(+), 34 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index a46ceb9..1bd5bb1 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -103,9 +103,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
{
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
struct hns_roce_alloc_ucontext_resp resp = {};
+ struct hns_roce_alloc_ucontext cmd = {};
struct ibv_device_attr dev_attrs;
struct hns_roce_context *context;
- struct ibv_get_context cmd;
int i;
context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
@@ -113,7 +113,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd),
+ cmd.config |= HNS_ROCE_EXSGE_FLAGS;
+ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -124,6 +125,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
else
context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
+ context->config = resp.config;
+ if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS)
+ context->max_inline_data = resp.max_inline_data;
+
context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
HNS_ROCE_QP_TABLE_BITS);
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 5d90634..5388f9c 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -213,6 +213,8 @@ struct hns_roce_context {
unsigned int max_srq_sge;
int max_cqe;
unsigned int cqe_size;
+ uint32_t config;
+ unsigned int max_inline_data;
};
struct hns_roce_pd {
@@ -267,6 +269,7 @@ struct hns_roce_wq {
unsigned int head;
unsigned int tail;
unsigned int max_gs;
+ unsigned int ext_sge_cnt;
unsigned int rsv_sge;
unsigned int wqe_shift;
unsigned int shift; /* wq size is 2^shift */
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 333f977..2753d30 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -47,7 +47,7 @@ DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
- empty, hns_roce_ib_alloc_ucontext_resp);
+ hns_roce_ib_alloc_ucontext, hns_roce_ib_alloc_ucontext_resp);
DECLARE_DRV_CMD(hns_roce_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
hns_roce_ib_create_qp, hns_roce_ib_create_qp_resp);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index bb4298f..ebe68bc 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -841,14 +841,6 @@ static void get_src_buf_info(void **src_addr, uint32_t *src_len,
}
}
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
-{
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
- return 0;
-
- return HNS_ROCE_SGE_IN_WQE;
-}
-
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
struct hns_roce_sge_info *sge_info,
const void *buf_list,
@@ -858,12 +850,9 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
void *dst_addr, *src_addr, *tail_bound_addr;
uint32_t src_len, tail_len;
- unsigned int std_sge_num;
int i;
- std_sge_num = get_std_sge_num(qp);
- if (sge_info->total_len >
- (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE)
+ if (sge_info->total_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE)
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index ba7f2ae..851b145 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -978,41 +978,88 @@ err_alloc:
return -ENOMEM;
}
-static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+/**
+ * Calculated sge num according to attr's max_send_sge
+ */
+static unsigned int get_sge_num_from_max_send_sge(bool is_ud,
+ uint32_t max_send_sge)
{
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
- return qp->sq.max_gs;
+ unsigned int std_sge_num;
+ unsigned int min_sge;
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
- return 0;
+/**
+ * Calculated sge num according to attr's max_inline_data
+ */
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud,
+ uint32_t max_inline_data)
+{
+ unsigned int inline_sge = 0;
+
+ inline_sge = max_inline_data / HNS_ROCE_SGE_SIZE;
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud && (inline_sge <= HNS_ROCE_SGE_IN_WQE))
+ inline_sge = 0;
+
+ return inline_sge;
}
-static void set_ext_sge_param(struct hns_roce_device *hr_dev,
+static void set_ext_sge_param(struct hns_roce_context *ctx,
struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp, unsigned int wr_cnt)
{
+ bool is_ud = (qp->verbs_qp.qp.qp_type == IBV_QPT_UD);
+ unsigned int ext_wqe_sge_cnt;
+ unsigned int inline_ext_sge;
unsigned int total_sge_cnt;
- unsigned int wqe_sge_cnt;
+ unsigned int std_sge_num;
qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
-
- qp->sq.max_gs = attr->cap.max_send_sge;
-
- wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud,
+ attr->cap.max_send_sge);
+
+ if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) {
+ attr->cap.max_inline_data = min_t(uint32_t, roundup_pow_of_two(
+ attr->cap.max_inline_data),
+ ctx->max_inline_data);
+
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud,
+ attr->cap.max_inline_data));
+ qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
+ qp->sq.max_gs = min((qp->sq.ext_sge_cnt + std_sge_num),
+ ctx->max_sge);
+
+ ext_wqe_sge_cnt = qp->sq.ext_sge_cnt;
+ } else {
+ qp->sq.max_gs = max(1U, attr->cap.max_send_sge);
+ qp->sq.max_gs = min(qp->sq.max_gs, ctx->max_sge);
+ qp->sq.ext_sge_cnt = qp->sq.max_gs;
+ }
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- if (wqe_sge_cnt) {
- total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
- qp->ex_sge.sge_cnt =
- max(total_sge_cnt,
- (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * ext_wqe_sge_cnt);
+ qp->ex_sge.sge_cnt = max(total_sge_cnt,
+ (unsigned int)HNS_HW_PAGE_SIZE /
+ HNS_ROCE_SGE_SIZE);
}
}
+
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp,
struct hns_roce_context *ctx)
@@ -1044,10 +1091,9 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
qp->sq.wqe_cnt = cnt;
qp->sq.shift = hr_ilog32(cnt);
- set_ext_sge_param(hr_dev, attr, qp, cnt);
+ set_ext_sge_param(ctx, attr, qp, cnt);
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
- qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
qp->sq_signal_bits = attr->sq_sig_all ? 0 : 1;
--
2.30.0

View File

@ -1,33 +0,0 @@
From 542b54285dbaebbe0b5eb3279134b02484d7329d Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Mon, 10 Oct 2022 21:21:55 +0800
Subject: [PATCH v4 05/10] Update kernel headers
To commit ?? ("RDMA/hns: Remove enable rq inline in kernel and add
compatibility handling").
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index fc83dfb..c70465d 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -87,10 +87,12 @@ struct hns_roce_ib_create_qp_resp {
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+ HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
};
enum {
HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+ HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
};
struct hns_roce_ib_alloc_ucontext_resp {
--
2.30.0

View File

@ -1,46 +0,0 @@
From 996bca51e2063dc790286cbc894e2c438f499441 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Mon, 10 Oct 2022 21:49:35 +0800
Subject: [PATCH v4 06/10] libhns: Add compatibility handling for rq inline
Add compatibility processing between different user space
and kernel space.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 2 +-
providers/hns/hns_roce_u_verbs.c | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 1bd5bb1..6c9aefa 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -113,7 +113,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS;
+ cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS;
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 851b145..3e9a306 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1079,7 +1079,9 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
cnt = roundup_pow_of_two(attr->cap.max_recv_wr);
qp->rq.wqe_cnt = cnt;
qp->rq.shift = hr_ilog32(cnt);
- qp->rq_rinl_buf.wqe_cnt = cnt;
+ qp->rq_rinl_buf.wqe_cnt = 0;
+ if (ctx->config & HNS_ROCE_RSP_RQ_INLINE_FLAGS)
+ qp->rq_rinl_buf.wqe_cnt = cnt;
attr->cap.max_recv_wr = qp->rq.wqe_cnt;
attr->cap.max_recv_sge = qp->rq.max_gs;
--
2.30.0

View File

@ -1,301 +0,0 @@
From 9e5f5d39757a5479a1a4e1170978d2e09acb995b Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Fri, 9 Sep 2022 17:42:38 +0800
Subject: [PATCH v4 07/10] libhns: Refactor rq inline
The ibv_sge struct is enough, there is no need to customize the
hns_roce_rinl_sge struct. Refactored structures and functions
for reuse in cqe inline (rq, srq scenarios).
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
providers/hns/hns_roce_u.h | 7 +--
providers/hns/hns_roce_u_hw_v2.c | 103 +++++++++++++++----------------
providers/hns/hns_roce_u_verbs.c | 46 +++++++-------
3 files changed, 75 insertions(+), 81 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 5388f9c..57ebe55 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -290,13 +290,8 @@ struct hns_roce_sge_ex {
unsigned int sge_shift;
};
-struct hns_roce_rinl_sge {
- void *addr;
- unsigned int len;
-};
-
struct hns_roce_rinl_wqe {
- struct hns_roce_rinl_sge *sg_list;
+ struct ibv_sge *sg_list;
unsigned int sge_cnt;
};
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index ebe68bc..73acc9e 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -34,6 +34,7 @@
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
+#include <ccan/minmax.h>
#include "hns_roce_u.h"
#include "hns_roce_u_db.h"
#include "hns_roce_u_hw_v2.h"
@@ -417,46 +418,42 @@ static void get_opcode_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
wc->opcode = wc_rcv_op_map[opcode];
}
-static int handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- struct hns_roce_qp **cur_qp, uint32_t opcode)
+static void handle_recv_inl_data(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_rinl_buf *rinl_buf,
+ uint32_t wr_cnt, uint8_t *buf)
{
- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
- (opcode == HNS_ROCE_RECV_OP_SEND ||
- opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
- opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
- hr_reg_read(cqe, CQE_RQ_INLINE)) {
- struct hns_roce_rinl_sge *sge_list;
- uint32_t wr_num, wr_cnt, sge_num, data_len;
- uint8_t *wqe_buf;
- uint32_t sge_cnt, size;
+ struct ibv_sge *sge_list;
+ uint32_t sge_num, data_len;
+ uint32_t sge_cnt, size;
- wr_num = hr_reg_read(cqe, CQE_WQE_IDX);
- wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
+ sge_list = rinl_buf->wqe_list[wr_cnt].sg_list;
+ sge_num = rinl_buf->wqe_list[wr_cnt].sge_cnt;
- sge_list = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sg_list;
- sge_num = (*cur_qp)->rq_rinl_buf.wqe_list[wr_cnt].sge_cnt;
- wqe_buf = (uint8_t *)get_recv_wqe_v2(*cur_qp, wr_cnt);
+ data_len = le32toh(cqe->byte_cnt);
- data_len = wc->byte_len;
+ for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
+ size = min(sge_list[sge_cnt].length, data_len);
- for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len);
- sge_cnt++) {
- size = sge_list[sge_cnt].len < data_len ?
- sge_list[sge_cnt].len : data_len;
+ memcpy((void *)(uintptr_t)sge_list[sge_cnt].addr, (void *)buf, size);
+ data_len -= size;
+ buf += size;
+ }
- memcpy((void *)sge_list[sge_cnt].addr,
- (void *)wqe_buf, size);
- data_len -= size;
- wqe_buf += size;
- }
+ if (data_len)
+ hr_reg_write(cqe, CQE_STATUS, HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR);
- if (data_len) {
- wc->status = IBV_WC_LOC_LEN_ERR;
- return V2_CQ_POLL_ERR;
- }
- }
+}
- return V2_CQ_OK;
+static void handle_recv_rq_inl(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp *cur_qp)
+{
+ uint8_t *wqe_buf;
+ uint32_t wr_num;
+
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX) & (cur_qp->rq.wqe_cnt - 1);
+
+ wqe_buf = (uint8_t *)get_recv_wqe_v2(cur_qp, wr_num);
+ handle_recv_inl_data(cqe, &(cur_qp->rq_rinl_buf), wr_num, wqe_buf);
}
static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
@@ -479,10 +476,9 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
}
static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
- struct hns_roce_qp *hr_qp, uint8_t opcode)
+ struct hns_roce_qp *hr_qp)
{
struct hns_roce_wq *wq;
- int ret;
wq = &hr_qp->rq;
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
@@ -491,12 +487,8 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
parse_for_ud_qp(cqe, wc);
- ret = handle_recv_inl_wqe(cqe, wc, &hr_qp, opcode);
- if (ret) {
- verbs_err(verbs_get_ctx(hr_qp->verbs_qp.qp.context),
- PFX "failed to handle recv inline wqe!\n");
- return ret;
- }
+ if (hr_reg_read(cqe, CQE_RQ_INLINE))
+ handle_recv_rq_inl(cqe, hr_qp);
return 0;
}
@@ -626,7 +618,7 @@ static int parse_cqe_for_cq(struct hns_roce_context *ctx, struct hns_roce_cq *cq
if (srq)
parse_cqe_for_srq(cqe, wc, srq);
else
- parse_cqe_for_resp(cqe, wc, cur_qp, opcode);
+ parse_cqe_for_resp(cqe, wc, cur_qp);
}
return 0;
@@ -1355,26 +1347,31 @@ static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
}
}
+static void fill_recv_inl_buf(struct hns_roce_rinl_buf *rinl_buf,
+ unsigned int wqe_idx, struct ibv_recv_wr *wr)
+{
+ struct ibv_sge *sge_list;
+ unsigned int i;
+
+ if (!rinl_buf->wqe_cnt)
+ return;
+
+ sge_list = rinl_buf->wqe_list[wqe_idx].sg_list;
+ rinl_buf->wqe_list[wqe_idx].sge_cnt = (unsigned int)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ memcpy((void *)&sge_list[i], (void *)&wr->sg_list[i],
+ sizeof(struct ibv_sge));
+}
+
static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
unsigned int wqe_idx, unsigned int max_sge)
{
- struct hns_roce_rinl_sge *sge_list;
- unsigned int i;
void *wqe;
wqe = get_recv_wqe_v2(qp, wqe_idx);
fill_recv_sge_to_wqe(wr, wqe, max_sge, qp->rq.rsv_sge);
- if (!qp->rq_rinl_buf.wqe_cnt)
- return;
-
- /* QP support receive inline wqe */
- sge_list = qp->rq_rinl_buf.wqe_list[wqe_idx].sg_list;
- qp->rq_rinl_buf.wqe_list[wqe_idx].sge_cnt = (unsigned int)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr = (void *)(uintptr_t)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
- }
+ fill_recv_inl_buf(&qp->rq_rinl_buf, wqe_idx, wr);
}
static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 3e9a306..1d661dd 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -855,43 +855,45 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
return verify_qp_create_cap(ctx, attr);
}
-static int qp_alloc_recv_inl_buf(struct ibv_qp_cap *cap,
- struct hns_roce_qp *qp)
+static int alloc_recv_rinl_buf(uint32_t max_sge,
+ struct hns_roce_rinl_buf *rinl_buf)
{
unsigned int cnt;
int i;
- cnt = qp->rq_rinl_buf.wqe_cnt;
- qp->rq_rinl_buf.wqe_list = calloc(cnt,
- sizeof(struct hns_roce_rinl_wqe));
- if (!qp->rq_rinl_buf.wqe_list)
+ cnt = rinl_buf->wqe_cnt;
+ rinl_buf->wqe_list = calloc(cnt,
+ sizeof(struct hns_roce_rinl_wqe));
+ if (!rinl_buf->wqe_list)
return ENOMEM;
- qp->rq_rinl_buf.wqe_list[0].sg_list = calloc(cnt * cap->max_recv_sge,
- sizeof(struct hns_roce_rinl_sge));
- if (!qp->rq_rinl_buf.wqe_list[0].sg_list)
+ rinl_buf->wqe_list[0].sg_list = calloc(cnt * max_sge,
+ sizeof(struct ibv_sge));
+ if (!rinl_buf->wqe_list[0].sg_list) {
+ free(rinl_buf->wqe_list);
return ENOMEM;
+ }
for (i = 0; i < cnt; i++) {
- int wqe_size = i * cap->max_recv_sge;
+ int wqe_size = i * max_sge;
- qp->rq_rinl_buf.wqe_list[i].sg_list =
- &(qp->rq_rinl_buf.wqe_list[0].sg_list[wqe_size]);
+ rinl_buf->wqe_list[i].sg_list =
+ &(rinl_buf->wqe_list[0].sg_list[wqe_size]);
}
return 0;
}
-static void qp_free_recv_inl_buf(struct hns_roce_qp *qp)
+static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf)
{
- if (qp->rq_rinl_buf.wqe_list) {
- if (qp->rq_rinl_buf.wqe_list[0].sg_list) {
- free(qp->rq_rinl_buf.wqe_list[0].sg_list);
- qp->rq_rinl_buf.wqe_list[0].sg_list = NULL;
+ if (rinl_buf->wqe_list) {
+ if (rinl_buf->wqe_list[0].sg_list) {
+ free(rinl_buf->wqe_list[0].sg_list);
+ rinl_buf->wqe_list[0].sg_list = NULL;
}
- free(qp->rq_rinl_buf.wqe_list);
- qp->rq_rinl_buf.wqe_list = NULL;
+ free(rinl_buf->wqe_list);
+ rinl_buf->wqe_list = NULL;
}
}
@@ -930,7 +932,7 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
static void qp_free_wqe(struct hns_roce_qp *qp)
{
- qp_free_recv_inl_buf(qp);
+ free_recv_rinl_buf(&qp->rq_rinl_buf);
if (qp->sq.wqe_cnt)
free(qp->sq.wrid);
@@ -958,7 +960,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
}
if (qp->rq_rinl_buf.wqe_cnt) {
- if (qp_alloc_recv_inl_buf(cap, qp))
+ if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
goto err_alloc;
}
@@ -968,7 +970,7 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
return 0;
err_alloc:
- qp_free_recv_inl_buf(qp);
+ free_recv_rinl_buf(&qp->rq_rinl_buf);
if (qp->rq.wrid)
free(qp->rq.wrid);
--
2.30.0

View File

@ -1,46 +0,0 @@
From 22beeec9a0d8272fc4db60275ee4eee890068102 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 11 Oct 2022 10:21:24 +0800
Subject: [PATCH v4 08/10] libhns: RQ inline support wc_x_poll_cq interface
RQ inline support user use wc_x_poll_cq get data.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b37ea92..25d8861 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -561,10 +561,15 @@ static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
hns_roce_free_srq_wqe(srq, wqe_idx);
}
-static void cqe_proc_rq(struct hns_roce_wq *wq, struct hns_roce_cq *cq)
+static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
{
+ struct hns_roce_wq *wq = &hr_qp->rq;
+
cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
+
+ if (hr_reg_read(cq->cqe, CQE_RQ_INLINE))
+ handle_recv_rq_inl(cq->cqe, hr_qp);
}
static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
@@ -584,7 +589,7 @@ static int cqe_proc_wq(struct hns_roce_context *ctx, struct hns_roce_qp *qp,
if (srq)
cqe_proc_srq(srq, wqe_idx, cq);
else
- cqe_proc_rq(&qp->rq, cq);
+ cqe_proc_rq(qp, cq);
}
return 0;
--
2.30.0

View File

@ -1,33 +0,0 @@
From 14cee9bd8ab06104b9f9a0326b8d17a5bf8ee647 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 11 Oct 2022 10:50:36 +0800
Subject: [PATCH v4 09/10] Update kernel headers
To commit ?? ("RDMA/hns: Support cqe inline in user space").
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index c70465d..41738b8 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -88,11 +88,13 @@ struct hns_roce_ib_create_qp_resp {
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
+ HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
};
enum {
HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
+ HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
};
struct hns_roce_ib_alloc_ucontext_resp {
--
2.30.0

View File

@ -1,263 +0,0 @@
From 71eb90581a338242a26123790e5f24df90327465 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Thu, 11 Aug 2022 20:50:54 +0800
Subject: [PATCH v4 10/10] libhns: Support cqe inline
When rq or srq recv data less than or equal to 32 byte in size, roce driver
support get data from cqe.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 3 ++-
providers/hns/hns_roce_u.h | 21 +++++++++--------
providers/hns/hns_roce_u_hw_v2.c | 39 ++++++++++++++++++++++++++++++--
providers/hns/hns_roce_u_hw_v2.h | 4 ++--
providers/hns/hns_roce_u_verbs.c | 25 ++++++++++++++++++--
5 files changed, 75 insertions(+), 17 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 6c9aefa..266e73e 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -113,7 +113,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS;
+ cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
+ HNS_ROCE_CQE_INLINE_FLAGS;
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 57ebe55..6b64cd0 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -246,10 +246,21 @@ struct hns_roce_idx_que {
unsigned int tail;
};
+struct hns_roce_rinl_wqe {
+ struct ibv_sge *sg_list;
+ unsigned int sge_cnt;
+};
+
+struct hns_roce_rinl_buf {
+ struct hns_roce_rinl_wqe *wqe_list;
+ unsigned int wqe_cnt;
+};
+
struct hns_roce_srq {
struct verbs_srq verbs_srq;
struct hns_roce_idx_que idx_que;
struct hns_roce_buf wqe_buf;
+ struct hns_roce_rinl_buf srq_rinl_buf;
pthread_spinlock_t lock;
unsigned long *wrid;
unsigned int srqn;
@@ -290,16 +301,6 @@ struct hns_roce_sge_ex {
unsigned int sge_shift;
};
-struct hns_roce_rinl_wqe {
- struct ibv_sge *sg_list;
- unsigned int sge_cnt;
-};
-
-struct hns_roce_rinl_buf {
- struct hns_roce_rinl_wqe *wqe_list;
- unsigned int wqe_cnt;
-};
-
struct hns_roce_qp {
struct verbs_qp verbs_qp;
struct hns_roce_buf buf;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 25d8861..7063b26 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -444,6 +444,28 @@ static void handle_recv_inl_data(struct hns_roce_v2_cqe *cqe,
}
+static void handle_recv_cqe_inl_from_rq(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp *cur_qp)
+{
+ uint32_t wr_num;
+
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX) & (cur_qp->rq.wqe_cnt - 1);
+
+ handle_recv_inl_data(cqe, &(cur_qp->rq_rinl_buf), wr_num,
+ (uint8_t *)cqe->payload);
+}
+
+static void handle_recv_cqe_inl_from_srq(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_srq *srq)
+{
+ uint32_t wr_num;
+
+ wr_num = hr_reg_read(cqe, CQE_WQE_IDX) & (srq->wqe_cnt - 1);
+
+ handle_recv_inl_data(cqe, &(srq->srq_rinl_buf), wr_num,
+ (uint8_t *)cqe->payload);
+}
+
static void handle_recv_rq_inl(struct hns_roce_v2_cqe *cqe,
struct hns_roce_qp *cur_qp)
{
@@ -473,6 +495,9 @@ static void parse_cqe_for_srq(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
wc->wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
hns_roce_free_srq_wqe(srq, wqe_idx);
+
+ if (hr_reg_read(cqe, CQE_CQE_INLINE))
+ handle_recv_cqe_inl_from_srq(cqe, srq);
}
static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
@@ -487,7 +512,9 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
parse_for_ud_qp(cqe, wc);
- if (hr_reg_read(cqe, CQE_RQ_INLINE))
+ if (hr_reg_read(cqe, CQE_CQE_INLINE))
+ handle_recv_cqe_inl_from_rq(cqe, hr_qp);
+ else if (hr_reg_read(cqe, CQE_RQ_INLINE))
handle_recv_rq_inl(cqe, hr_qp);
return 0;
@@ -559,6 +586,9 @@ static void cqe_proc_srq(struct hns_roce_srq *srq, uint32_t wqe_idx,
{
cq->verbs_cq.cq_ex.wr_id = srq->wrid[wqe_idx & (srq->wqe_cnt - 1)];
hns_roce_free_srq_wqe(srq, wqe_idx);
+
+ if (hr_reg_read(cq->cqe, CQE_CQE_INLINE))
+ handle_recv_cqe_inl_from_srq(cq->cqe, srq);
}
static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
@@ -568,7 +598,9 @@ static void cqe_proc_rq(struct hns_roce_qp *hr_qp, struct hns_roce_cq *cq)
cq->verbs_cq.cq_ex.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
- if (hr_reg_read(cq->cqe, CQE_RQ_INLINE))
+ if (hr_reg_read(cq->cqe, CQE_CQE_INLINE))
+ handle_recv_cqe_inl_from_rq(cq->cqe, hr_qp);
+ else if (hr_reg_read(cq->cqe, CQE_RQ_INLINE))
handle_recv_rq_inl(cq->cqe, hr_qp);
}
@@ -1725,6 +1757,9 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
wqe = get_srq_wqe(srq, wqe_idx);
fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+
+ fill_recv_inl_buf(&srq->srq_rinl_buf, wqe_idx, wr);
+
fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 098dbdf..d71c695 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -157,7 +157,7 @@ struct hns_roce_v2_cqe {
__le32 smac;
__le32 byte_28;
__le32 byte_32;
- __le32 rsv[8];
+ __le32 payload[8];
};
#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
@@ -170,7 +170,7 @@ struct hns_roce_v2_cqe {
#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
-#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
+#define CQE_CQE_INLINE CQE_FIELD_LOC(89, 88)
#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 1d661dd..cff9d1d 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -522,6 +522,8 @@ static int verify_srq_create_attr(struct hns_roce_context *context,
static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
struct ibv_srq_init_attr_ex *attr)
{
+ struct hns_roce_context *ctx = to_hr_ctx(context);
+
if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
srq->rsv_sge = 1;
@@ -531,6 +533,10 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
srq->max_gs));
attr->attr.max_sge = srq->max_gs;
attr->attr.srq_limit = 0;
+
+ srq->srq_rinl_buf.wqe_cnt = 0;
+ if (ctx->config & HNS_ROCE_RSP_CQE_INLINE_FLAGS)
+ srq->srq_rinl_buf.wqe_cnt = srq->wqe_cnt;
}
static int alloc_srq_idx_que(struct hns_roce_srq *srq)
@@ -570,6 +576,11 @@ static int alloc_srq_wqe_buf(struct hns_roce_srq *srq)
return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE);
}
+static int alloc_recv_rinl_buf(uint32_t max_sge,
+ struct hns_roce_rinl_buf *rinl_buf);
+
+static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf);
+
static int alloc_srq_buf(struct hns_roce_srq *srq)
{
int ret;
@@ -582,14 +593,22 @@ static int alloc_srq_buf(struct hns_roce_srq *srq)
if (ret)
goto err_idx_que;
+ if (srq->srq_rinl_buf.wqe_cnt) {
+ ret = alloc_recv_rinl_buf(srq->max_gs, &srq->srq_rinl_buf);
+ if (ret)
+ goto err_wqe_buf;
+ }
+
srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid));
if (!srq->wrid) {
ret = -ENOMEM;
- goto err_wqe_buf;
+ goto err_inl_buf;
}
return 0;
+err_inl_buf:
+ free_recv_rinl_buf(&srq->srq_rinl_buf);
err_wqe_buf:
hns_roce_free_buf(&srq->wqe_buf);
err_idx_que:
@@ -603,6 +622,7 @@ static void free_srq_buf(struct hns_roce_srq *srq)
{
free(srq->wrid);
hns_roce_free_buf(&srq->wqe_buf);
+ free_recv_rinl_buf(&srq->srq_rinl_buf);
hns_roce_free_buf(&srq->idx_que.buf);
free(srq->idx_que.bitmap);
}
@@ -1082,7 +1102,8 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
qp->rq.wqe_cnt = cnt;
qp->rq.shift = hr_ilog32(cnt);
qp->rq_rinl_buf.wqe_cnt = 0;
- if (ctx->config & HNS_ROCE_RSP_RQ_INLINE_FLAGS)
+ if (ctx->config & (HNS_ROCE_RSP_RQ_INLINE_FLAGS |
+ HNS_ROCE_RSP_CQE_INLINE_FLAGS))
qp->rq_rinl_buf.wqe_cnt = cnt;
attr->cap.max_recv_wr = qp->rq.wqe_cnt;
--
2.30.0

View File

@ -1,38 +0,0 @@
From 12d2a17d404e3d5ba76863f64307ea52a7d15d15 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Sat, 29 Oct 2022 10:44:17 +0800
Subject: [PATCH 11/12] Update kernel headers
To commit ?? ("RDMA/hns: Support DSCP of userspace").
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 41738b8..542be5e 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -85,6 +85,18 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
+struct hns_roce_ib_create_ah_resp {
+ __u8 priority;
+ __u8 tc_mode;
+ __u8 reserved[6];
+};
+
+struct hns_roce_ib_modify_qp_resp {
+ __u8 tc_mode;
+ __u8 priority;
+ __u8 reserved[6];
+};
+
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
--
2.30.0

View File

@ -1,150 +0,0 @@
From b88e6ae3e144651092bce923123ca20361cdacab Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Tue, 27 Sep 2022 19:06:00 +0800
Subject: [PATCH 12/12] libhns: Support DSCP
This patch adds user mode DSCP function through
the mapping of dscp-tc configured in kernel mode.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
---
providers/hns/hns_roce_u.h | 7 +++++++
providers/hns/hns_roce_u_abi.h | 6 ++++++
providers/hns/hns_roce_u_hw_v2.c | 19 +++++++++++++++----
providers/hns/hns_roce_u_verbs.c | 7 +++++--
4 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 6b64cd0..8c1cb1e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -175,6 +175,11 @@ enum hns_roce_db_type {
HNS_ROCE_DB_TYPE_NUM
};
+enum hns_roce_tc_map_mode {
+ HNS_ROCE_TC_MAP_MODE_PRIO,
+ HNS_ROCE_TC_MAP_MODE_DSCP,
+};
+
struct hns_roce_db_page {
struct hns_roce_db_page *prev, *next;
struct hns_roce_buf buf;
@@ -315,6 +320,8 @@ struct hns_roce_qp {
unsigned int next_sge;
int port_num;
uint8_t sl;
+ uint8_t tc_mode;
+ uint8_t priority;
unsigned int qkey;
enum ibv_mtu path_mtu;
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 2753d30..0519ac7 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -49,6 +49,9 @@ DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
hns_roce_ib_alloc_ucontext, hns_roce_ib_alloc_ucontext_resp);
+DECLARE_DRV_CMD(hns_roce_create_ah, IB_USER_VERBS_CMD_CREATE_AH, empty,
+ hns_roce_ib_create_ah_resp);
+
DECLARE_DRV_CMD(hns_roce_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
hns_roce_ib_create_qp, hns_roce_ib_create_qp_resp);
@@ -61,4 +64,7 @@ DECLARE_DRV_CMD(hns_roce_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
DECLARE_DRV_CMD(hns_roce_create_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ,
hns_roce_ib_create_srq, hns_roce_ib_create_srq_resp);
+DECLARE_DRV_CMD(hns_roce_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ empty, hns_roce_ib_modify_qp_resp);
+
#endif /* _HNS_ROCE_U_ABI_H */
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index a30d461..c652eea 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1543,10 +1543,11 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
int attr_mask)
{
- int ret;
- struct ibv_modify_qp cmd;
+ struct hns_roce_modify_qp_ex_resp resp_ex = {};
+ struct hns_roce_modify_qp_ex cmd_ex = {};
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
bool flag = false; /* modify qp to error */
+ int ret;
if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) {
pthread_spin_lock(&hr_qp->sq.lock);
@@ -1554,7 +1555,9 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
flag = true;
}
- ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
+ ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd,
+ sizeof(cmd_ex), &resp_ex.ibv_resp,
+ sizeof(resp_ex));
if (flag) {
pthread_spin_unlock(&hr_qp->rq.lock);
@@ -1564,8 +1567,13 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (ret)
return ret;
- if (attr_mask & IBV_QP_STATE)
+ if (attr_mask & IBV_QP_STATE) {
qp->state = attr->qp_state;
+ if (attr->qp_state == IBV_QPS_RTR) {
+ hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
+ hr_qp->priority = resp_ex.drv_payload.priority;
+ }
+ }
if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) {
if (qp->recv_cq)
@@ -1579,6 +1587,9 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
hns_roce_init_qp_indices(to_hr_qp(qp));
}
+ if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
+ hr_qp->sl = hr_qp->priority;
+
record_qp_attr(qp, attr, attr_mask);
return ret;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index cff9d1d..3b7a67d 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1449,7 +1449,7 @@ static int get_tclass(struct ibv_context *context, struct ibv_ah_attr *attr,
struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
{
struct hns_roce_device *hr_dev = to_hr_dev(pd->context->device);
- struct ib_uverbs_create_ah_resp resp = {};
+ struct hns_roce_create_ah_resp resp = {};
struct hns_roce_ah *ah;
/* HIP08 don't support create ah */
@@ -1477,12 +1477,15 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
memcpy(ah->av.dgid, attr->grh.dgid.raw, ARRAY_SIZE(ah->av.dgid));
}
- if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp, sizeof(resp)))
+ if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
goto err;
if (ibv_resolve_eth_l2_from_gid(pd->context, attr, ah->av.mac, NULL))
goto err;
+ if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
+ ah->av.sl = resp.priority;
+
ah->av.udp_sport = get_ah_udp_sport(attr);
return &ah->ibv_ah;
--
2.30.0

View File

@ -1,28 +0,0 @@
From 6be317e9e2b894d460c4f3422f349895d475ef8d Mon Sep 17 00:00:00 2001
From: Kirill Martynov <k.martynov@yadro.com>
Date: Mon, 20 Jun 2022 16:29:09 +0300
Subject: cma: Release allocated port array
Fix mem leak for allocated port array
Fixes: 1b9125689fec ("cma: Workaround for rdma_ucm kernel bug")
Signed-off-by: Kirill Martynov <k.martynov@yadro.com>
---
librdmacm/cma.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/librdmacm/cma.c b/librdmacm/cma.c
index 2bde34a..7b924bd 100644
--- a/librdmacm/cma.c
+++ b/librdmacm/cma.c
@@ -304,6 +304,7 @@ static void remove_cma_dev(struct cma_device *cma_dev)
ibv_dealloc_pd(cma_dev->pd);
if (cma_dev->verbs)
ibv_close_device(cma_dev->verbs);
+ free(cma_dev->port);
list_del_from(&cma_dev_list, &cma_dev->entry);
free(cma_dev);
}
--
2.34.1

View File

@ -1,28 +0,0 @@
From 3704db8f8496ffd967ef8d8840eef2c04b7f4b06 Mon Sep 17 00:00:00 2001
From: Mikhail Sokolovskiy <sokolmish@gmail.com>
Date: Fri, 1 Jul 2022 17:34:24 +0300
Subject: rsockets: Fix allocation size There is memory allocation for (nfds +
1) elements, but actually less space is allocated (1 byte for new element
instead of sizeof(pollfd)). This is caused by operators precedence mistake.
Signed-off-by: Mikhail Sokolovskiy <sokolmish@gmail.com>
---
librdmacm/rsocket.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/librdmacm/rsocket.c b/librdmacm/rsocket.c
index 8c81096..e26a053 100644
--- a/librdmacm/rsocket.c
+++ b/librdmacm/rsocket.c
@@ -3173,7 +3173,7 @@ static struct pollfd *rs_fds_alloc(nfds_t nfds)
else if (rs_pollinit())
return NULL;
- rfds = malloc(sizeof(*rfds) * nfds + 1);
+ rfds = malloc(sizeof(*rfds) * (nfds + 1));
rnfds = rfds ? nfds + 1 : 0;
}
--
2.34.1

View File

@ -1,95 +0,0 @@
From 9cad02f325f48434343970d54bff5a5e897d9be2 Mon Sep 17 00:00:00 2001
From: Wei Xu <xuwei5@hisilicon.com>
Date: Thu, 11 Aug 2022 02:30:56 +0000
Subject: tests/test_mr.py: Change the argument of DmaBufMR to fix the
TypeError
Replaced the argument 'unit' with 'gpu' to fix following error for the DmaBufMRTest:
TypeError: __init__() got an unexpected keyword argument 'unit'
Fixed: ffa97cb59f82 ("tests: Let PyverbsAPITestCase have one default device")
Signed-off-by: Wei Xu <xuwei5@hisilicon.com>
---
tests/test_mr.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tests/test_mr.py b/tests/test_mr.py
index 3ec1fb3..f34b4d0 100644
--- a/tests/test_mr.py
+++ b/tests/test_mr.py
@@ -490,7 +490,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
- with DmaBufMR(pd, len, f, offset=off, unit=self.gpu,
+ with DmaBufMR(pd, len, f, offset=off, gpu=self.gpu,
gtt=self.gtt) as mr:
pass
@@ -505,7 +505,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
- with DmaBufMR(pd, len, f, offset=off, unit=self.gpu,
+ with DmaBufMR(pd, len, f, offset=off, gpu=self.gpu,
gtt=self.gtt) as mr:
mr.close()
@@ -520,7 +520,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
- with DmaBufMR(pd, len, f, offset=off, unit=self.gpu,
+ with DmaBufMR(pd, len, f, offset=off, gpu=self.gpu,
gtt=self.gtt) as mr:
# Pyverbs supports multiple destruction of objects,
# we are not expecting an exception here.
@@ -543,7 +543,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
mr_flags += i.value
try:
DmaBufMR(pd, u.get_mr_length(), mr_flags,
- unit=self.gpu, gtt=self.gtt)
+ gpu=self.gpu, gtt=self.gtt)
except PyverbsRDMAError as err:
assert 'Failed to register a dma-buf MR' in err.args[0]
else:
@@ -562,7 +562,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off,
- unit=self.gpu, gtt=self.gtt) as mr:
+ gpu=self.gpu, gtt=self.gtt) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
mr.write('a' * write_len, write_len)
@@ -580,7 +580,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off,
- unit=self.gpu, gtt=self.gtt) as mr:
+ gpu=self.gpu, gtt=self.gtt) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
write_str = 'a' * write_len
@@ -600,7 +600,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(self.ctx)
for f in flags:
- with DmaBufMR(pd, length, f, unit=self.gpu,
+ with DmaBufMR(pd, length, f, gpu=self.gpu,
gtt=self.gtt) as mr:
mr.lkey
@@ -614,7 +614,7 @@ class DmaBufMRTest(PyverbsAPITestCase):
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(self.ctx)
for f in flags:
- with DmaBufMR(pd, length, f, unit=self.gpu,
+ with DmaBufMR(pd, length, f, gpu=self.gpu,
gtt=self.gtt) as mr:
mr.rkey
--
2.34.1

File diff suppressed because it is too large Load Diff

View File

@ -1,46 +0,0 @@
From 349d850df17e2287bd6b02766e30be79b701cd6e Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Sun, 19 Jun 2022 13:34:23 +0300
Subject: mlx5: Adjust Crypto BSF size if signature is used
[ Upstream commit b38f3439a983f42dd5ac8f93f0813a969720225f ]
When a Mkey is configured with crypto and signature offload, the crypto
BSF size and signature BSF size should both be set to 128 Bytes.
Currently, when building the crypto BSF, we do not take into account
if signature mode is configured or not, and we set the crypto BSF size to
64 Bytes.
The situation above does not affect crypto configuration on CX6 HCA,
but will obstruct crypto traffic in more recent HCA's such as
CX6Dx and onwards.
Thus, check if signature mode is configured when building crypto bsf,
and set the bsf size accordingly.
Fixes: b5f0a5875380 ("mlx5: Add crypto setter for MKey")
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
providers/mlx5/qp.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/providers/mlx5/qp.c b/providers/mlx5/qp.c
index d0e5a49..f9f3d1a 100644
--- a/providers/mlx5/qp.c
+++ b/providers/mlx5/qp.c
@@ -2414,7 +2414,8 @@ static int mlx5_umr_fill_crypto_bsf(struct mlx5_crypto_bsf *crypto_bsf,
memset(crypto_bsf, 0, sizeof(*crypto_bsf));
- crypto_bsf->bsf_size_type |= MLX5_BSF_SIZE_WITH_INLINE
+ crypto_bsf->bsf_size_type |= (block ? MLX5_BSF_SIZE_SIG_AND_CRYPTO :
+ MLX5_BSF_SIZE_WITH_INLINE)
<< MLX5_BSF_SIZE_SHIFT;
crypto_bsf->bsf_size_type |= MLX5_BSF_TYPE_CRYPTO;
order = get_crypto_order(attr->encrypt_on_tx,
--
2.34.1

View File

@ -1,34 +0,0 @@
From 202637191a96aa1b8f1ee841d71ac8abdfad82bc Mon Sep 17 00:00:00 2001
From: Muhammad Sammar <muhammads@nvidia.com>
Date: Tue, 21 Jun 2022 12:30:39 +0300
Subject: mlx5: DR, Fix missing comma in matcher builder dump line
[ Upstream commit f50b33a69f12024d0b998d5d5062656a6aee6a92 ]
Add missing comma to matcher builder dump line.
Fixes: 6a1f3b4baa2e ("mlx5: Add support for dr_matcher to the steering dump API")
Reviewed-by: Erez Shitrit <erezsh@nvidia.com>
Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
providers/mlx5/dr_dbg.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/mlx5/dr_dbg.c b/providers/mlx5/dr_dbg.c
index 1e82e25..62e3f36 100644
--- a/providers/mlx5/dr_dbg.c
+++ b/providers/mlx5/dr_dbg.c
@@ -410,7 +410,7 @@ static int dr_dump_matcher_builder(FILE *f, struct dr_ste_build *builder,
bool is_match = builder->htbl_type == DR_STE_HTBL_TYPE_MATCH;
int ret;
- ret = fprintf(f, "%d,0x%" PRIx64 "%d,%d,0x%x,%d\n",
+ ret = fprintf(f, "%d,0x%" PRIx64 ",%d,%d,0x%x,%d\n",
DR_DUMP_REC_TYPE_MATCHER_BUILDER,
matcher_id,
index,
--
2.34.1

View File

@ -1,58 +0,0 @@
From f6aa0ecbe50a3141d6a3a03f0282df3ec96b76df Mon Sep 17 00:00:00 2001
From: Benjamin Gilbert <bgilbert@redhat.com>
Date: Tue, 16 Aug 2022 23:20:45 -0400
Subject: Install xprtrdma/svcrdma kmods in redhat/suse dracut modules
[ Upstream commit aa40d6dab34d2a465e520ddb13858bd47c7b1c06 ]
The rdma dracut module installs udev rules that can cause
rdma-load-modules@rdma.service to load kernel modules listed in rdma.conf.
That file mentions the xprtrdma and svcrdma modules (both of which are
aliases for rpcrdma in kernel 5.18) but the dracut module doesn't install
them in the initrd. If they're not installed by other means, this causes
warnings in the journal:
systemd-modules-load[...]: Failed to find module 'xprtrdma'
systemd-modules-load[...]: Failed to find module 'svcrdma'
Before systemd 244, it also causes rdma-load-modules@rdma.service to fail
entirely.
Fix by explicitly installing those modules in the initrd.
See also https://bugzilla.redhat.com/show_bug.cgi?id=2117375.
Fixes: 8bb38f6cb1b2 ("redhat: update dracut setting")
Fixes: 775241089e26 ("suse: fix dracut support")
Signed-off-by: Benjamin Gilbert <bgilbert@redhat.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
redhat/rdma.modules-setup.sh | 2 +-
suse/module-setup.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/redhat/rdma.modules-setup.sh b/redhat/rdma.modules-setup.sh
index 1dabb5d..4dab750 100644
--- a/redhat/rdma.modules-setup.sh
+++ b/redhat/rdma.modules-setup.sh
@@ -29,5 +29,5 @@ install() {
installkernel() {
hostonly='' instmods =drivers/infiniband =drivers/net/ethernet/mellanox =drivers/net/ethernet/chelsio =drivers/net/ethernet/cisco =drivers/net/ethernet/emulex =drivers/target
- hostonly='' instmods crc-t10dif crct10dif_common
+ hostonly='' instmods crc-t10dif crct10dif_common xprtrdma svcrdma
}
diff --git a/suse/module-setup.sh b/suse/module-setup.sh
index 26419bf..1e5c517 100644
--- a/suse/module-setup.sh
+++ b/suse/module-setup.sh
@@ -27,5 +27,5 @@ install() {
installkernel() {
hostonly='' instmods =drivers/infiniband =drivers/net/ethernet/mellanox =drivers/net/ethernet/chelsio =drivers/net/ethernet/cisco =drivers/net/ethernet/emulex =drivers/target
- hostonly='' instmods crc-t10dif crct10dif_common
+ hostonly='' instmods crc-t10dif crct10dif_common xprtrdma svcrdma
}
--
2.34.1

View File

@ -1,85 +0,0 @@
From 94b468fad35b00bd43d28b9e680a4921baaf75c4 Mon Sep 17 00:00:00 2001
From: Sindhu-Devale <sindhu.devale@intel.com>
Date: Thu, 8 Sep 2022 15:44:12 -0400
Subject: providers/irdma: Explicitly set QP modify attributes for reflush
[ Upstream commit 1ffbbce65e60ee031be70a2bfb6ec319306e4378 ]
irdma issues a reflush via a modify QP to ERROR op to report completions for
WR's posted once the QP is in error state.
However, this reflush modify attributes is incorrectly keyed off the last QP
QP attributes by an application which might or might not be a modify to error.
In the later case, a flush WQE is missed.
Explicitly set the attr.qp_state and attr_mask during a reflush modify
to move the QP to error state once its in error state. Remove ibv_qp
attributes from irdma_uqp struct
Fixes: 14a0fc8 ("rdma-core/irdma: Implement device supported verb APIs")
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Sindhu-Devale <sindhu.devale@intel.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
providers/irdma/umain.h | 2 --
providers/irdma/uverbs.c | 9 +++------
2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/providers/irdma/umain.h b/providers/irdma/umain.h
index 9e802fd..109e2f1 100644
--- a/providers/irdma/umain.h
+++ b/providers/irdma/umain.h
@@ -88,7 +88,6 @@ struct irdma_ucq {
struct irdma_uqp {
struct ibv_qp ibv_qp;
- struct ibv_qp_attr attr;
struct irdma_ucq *send_cq;
struct irdma_ucq *recv_cq;
struct verbs_mr vmr;
@@ -103,7 +102,6 @@ struct irdma_uqp {
struct ibv_recv_wr *pend_rx_wr;
struct irdma_qp_uk qp;
enum ibv_qp_type qp_type;
- enum ibv_qp_attr_mask attr_mask;
struct irdma_sge *recv_sges;
};
diff --git a/providers/irdma/uverbs.c b/providers/irdma/uverbs.c
index 040b4ec..c2b326d 100644
--- a/providers/irdma/uverbs.c
+++ b/providers/irdma/uverbs.c
@@ -1416,12 +1416,9 @@ int irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask)
struct irdma_uvcontext *iwctx;
struct irdma_uqp *iwuqp;
-
iwuqp = container_of(qp, struct irdma_uqp, ibv_qp);
iwctx = container_of(qp->context, struct irdma_uvcontext,
ibv_ctx.context);
- iwuqp->attr_mask = attr_mask;
- memcpy(&iwuqp->attr, attr, sizeof(iwuqp->attr));
if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE &&
attr_mask & IBV_QP_STATE && iwctx->uk_attrs.hw_rev > IRDMA_GEN_1) {
@@ -1464,13 +1461,13 @@ static void irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush)
{
struct ib_uverbs_ex_modify_qp_resp resp = {};
struct irdma_umodify_qp cmd_ex = {};
- struct irdma_uqp *iwuqp;
+ struct ibv_qp_attr attr = {};
+ attr.qp_state = IBV_QPS_ERR;
cmd_ex.sq_flush = sq_flush;
cmd_ex.rq_flush = rq_flush;
- iwuqp = container_of(qp, struct irdma_uqp, ibv_qp);
- ibv_cmd_modify_qp_ex(qp, &iwuqp->attr, iwuqp->attr_mask,
+ ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE,
&cmd_ex.ibv_cmd, sizeof(cmd_ex),
&resp, sizeof(resp));
}
--
2.34.1

View File

@ -1,38 +0,0 @@
From 660ac56e1ee71a177554432e9b9994aea1bdd0d4 Mon Sep 17 00:00:00 2001
From: Sindhu-Devale <sindhu.devale@intel.com>
Date: Thu, 8 Sep 2022 17:22:32 -0400
Subject: providers/irdma: Use s/g array in post send only when its valid
[ Upstream commit 7bc6e3b49cdac9776e740e9d886e3676524996f8 ]
Send with invalidate verb call can pass in an
uninitialized s/g array with 0 sge's which is
filled into irdma WQE and causes a HW asynchronous event.
Fix this by using the s/g array in irdma post send only when its valid.
Fixes: 3bebdf5 ("rdma-core/irdma: Add user/kernel shared libraries")
Signed-off-by: Tatyana Nikolova tatyana.e.nikolova@intel.com
Signed-off-by: Sindhu-Devale <sindhu.devale@intel.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
providers/irdma/uk.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/providers/irdma/uk.c b/providers/irdma/uk.c
index beda34b..43ea45a 100644
--- a/providers/irdma/uk.c
+++ b/providers/irdma/uk.c
@@ -476,7 +476,8 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
i = 0;
} else {
- qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ frag_cnt ? op_info->sg_list : NULL,
qp->swqe_polarity);
i = 1;
}
--
2.34.1

View File

@ -1,52 +0,0 @@
From b3dd8cf57c07055372f93aba508572274a59f7b5 Mon Sep 17 00:00:00 2001
From: Sindhu-Devale <sindhu.devale@intel.com>
Date: Thu, 8 Sep 2022 17:39:40 -0400
Subject: providers/irdma: Report correct WC errors
[ Upstream commit 7f9761eb541413bf113a6ba841791bd5fd47872e ]
Return specific WC errors for certain type of error
events.
In particular,
Return IBV_WC_REM_INV_REQ_ERR for an invalid
request related asynchronous event.
Fixes: 14a0fc8 ("rdma-core/irdma: Implement device supported verb APIs")
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Sindhu-Devale <sindhu.devale@intel.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
providers/irdma/user.h | 1 +
providers/irdma/uverbs.c | 2 ++
2 files changed, 3 insertions(+)
diff --git a/providers/irdma/user.h b/providers/irdma/user.h
index 2506d48..f8bbc27 100644
--- a/providers/irdma/user.h
+++ b/providers/irdma/user.h
@@ -106,6 +106,7 @@ enum irdma_flush_opcode {
FLUSH_FATAL_ERR,
FLUSH_RETRY_EXC_ERR,
FLUSH_MW_BIND_ERR,
+ FLUSH_REM_INV_REQ_ERR,
};
enum irdma_cmpl_status {
diff --git a/providers/irdma/uverbs.c b/providers/irdma/uverbs.c
index c2b326d..1b36bca 100644
--- a/providers/irdma/uverbs.c
+++ b/providers/irdma/uverbs.c
@@ -560,6 +560,8 @@ static enum ibv_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcod
return IBV_WC_RETRY_EXC_ERR;
case FLUSH_MW_BIND_ERR:
return IBV_WC_MW_BIND_ERR;
+ case FLUSH_REM_INV_REQ_ERR:
+ return IBV_WC_REM_INV_REQ_ERR;
case FLUSH_FATAL_ERR:
default:
return IBV_WC_FATAL_ERR;
--
2.34.1

View File

@ -1,34 +0,0 @@
From ef27ae99376e5b672c12e856fb72c2d94d8d2cf5 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 19 May 2022 10:58:11 -0500
Subject: pyverbs: Increment the correct rkey in test_qpex
[ Upstream commit 0c4d91db686ef4e4364aae2514d22e8462335bd9 ]
The local bind is manipulating the local rkey, not the server rkey. Bind
doesn't check that the high bits are correct so this was missed.
Fixes: 9fca2824b5ec ("tests: Retrieve tests that generates mlx5 CQE errors")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
tests/test_qpex.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_qpex.py b/tests/test_qpex.py
index 8f3f338..a4c9991 100644
--- a/tests/test_qpex.py
+++ b/tests/test_qpex.py
@@ -300,7 +300,7 @@ class QpExTestCase(RDMATestCase):
if ex.error_code == errno.EOPNOTSUPP:
raise unittest.SkipTest('Memory Window allocation is not supported')
raise ex
- new_key = inc_rkey(server.mr.rkey)
+ new_key = inc_rkey(mw.rkey)
server.qp.wr_bind_mw(mw, new_key, bind_info)
server.qp.wr_complete()
u.poll_cq(server.cq)
--
2.34.1

View File

@ -1,88 +0,0 @@
From 080f7181a2b4d2316d03c702f65640d6e8b1031e Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Mon, 19 Sep 2022 11:39:15 -0400
Subject: mckey: Use rdma_create_qp_ex only for loopback prevention
[ Upstream commit 926a1158e33d78573859f5dfea399f7a7edcf11f ]
As not all the providers support the rdma_create_qp_ex(), change the
code to use rdma_create_qp_ex() only when loopback prevention is requested.
Fixes: 40806cc22936 ("rdma-core: Add support for multicast loopback prevention to mckey")
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
librdmacm/examples/mckey.c | 48 +++++++++++++++++++++++++-------------
1 file changed, 32 insertions(+), 16 deletions(-)
diff --git a/librdmacm/examples/mckey.c b/librdmacm/examples/mckey.c
index 7015ed2..6df53a2 100644
--- a/librdmacm/examples/mckey.c
+++ b/librdmacm/examples/mckey.c
@@ -134,7 +134,8 @@ static int verify_test_params(struct cmatest_node *node)
static int init_node(struct cmatest_node *node)
{
struct ibv_qp_init_attr_ex init_qp_attr_ex;
- int cqe, ret;
+ struct ibv_qp_init_attr init_qp_attr;
+ int cqe, ret = 0;
node->pd = ibv_alloc_pd(node->cma_id->verbs);
if (!node->pd) {
@@ -151,23 +152,38 @@ static int init_node(struct cmatest_node *node)
goto out;
}
- memset(&init_qp_attr_ex, 0, sizeof(init_qp_attr_ex));
- init_qp_attr_ex.cap.max_send_wr = message_count ? message_count : 1;
- init_qp_attr_ex.cap.max_recv_wr = message_count ? message_count : 1;
- init_qp_attr_ex.cap.max_send_sge = 1;
- init_qp_attr_ex.cap.max_recv_sge = 1;
- init_qp_attr_ex.qp_context = node;
- init_qp_attr_ex.sq_sig_all = 0;
- init_qp_attr_ex.qp_type = IBV_QPT_UD;
- init_qp_attr_ex.send_cq = node->cq;
- init_qp_attr_ex.recv_cq = node->cq;
-
- init_qp_attr_ex.comp_mask = IBV_QP_INIT_ATTR_CREATE_FLAGS|IBV_QP_INIT_ATTR_PD;
- init_qp_attr_ex.pd = node->pd;
- if (!loopback)
+ memset(&init_qp_attr, 0, sizeof init_qp_attr);
+ init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
+ init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
+ init_qp_attr.cap.max_send_sge = 1;
+ init_qp_attr.cap.max_recv_sge = 1;
+ init_qp_attr.qp_context = node;
+ init_qp_attr.sq_sig_all = 0;
+ init_qp_attr.qp_type = IBV_QPT_UD;
+ init_qp_attr.send_cq = node->cq;
+ init_qp_attr.recv_cq = node->cq;
+
+ if (!loopback) {
+ memset(&init_qp_attr_ex, 0, sizeof(init_qp_attr_ex));
+ init_qp_attr_ex.cap.max_send_wr = message_count ? message_count : 1;
+ init_qp_attr_ex.cap.max_recv_wr = message_count ? message_count : 1;
+ init_qp_attr_ex.cap.max_send_sge = 1;
+ init_qp_attr_ex.cap.max_recv_sge = 1;
+ init_qp_attr_ex.qp_context = node;
+ init_qp_attr_ex.sq_sig_all = 0;
+ init_qp_attr_ex.qp_type = IBV_QPT_UD;
+ init_qp_attr_ex.send_cq = node->cq;
+ init_qp_attr_ex.recv_cq = node->cq;
+
+ init_qp_attr_ex.comp_mask = IBV_QP_INIT_ATTR_CREATE_FLAGS|IBV_QP_INIT_ATTR_PD;
+ init_qp_attr_ex.pd = node->pd;
init_qp_attr_ex.create_flags = IBV_QP_CREATE_BLOCK_SELF_MCAST_LB;
- ret = rdma_create_qp_ex(node->cma_id, &init_qp_attr_ex);
+ ret = rdma_create_qp_ex(node->cma_id, &init_qp_attr_ex);
+ } else {
+ ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
+ }
+
if (ret) {
perror("mckey: unable to create QP");
goto out;
--
2.34.1

View File

@ -1,35 +0,0 @@
From 3e56594215ccce88b89638ec16480d970776e3fb Mon Sep 17 00:00:00 2001
From: Benjamin Drung <bdrung@ubuntu.com>
Date: Tue, 27 Sep 2022 12:01:14 +0200
Subject: Fix spelling mistake of underlying
[ Upstream commit 47f3a9fd706c4a407b7bbea12ffd16edd120883e ]
Fixes: cc6eb6dd7b73 ("pyverbs: Add support for memory window creation")
Signed-off-by: Benjamin Drung <bdrung@ubuntu.com>
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
pyverbs/mr.pyx | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pyverbs/mr.pyx b/pyverbs/mr.pyx
index 72bb181..e7f5f52 100644
--- a/pyverbs/mr.pyx
+++ b/pyverbs/mr.pyx
@@ -314,10 +314,10 @@ cdef class MW(PyverbsCM):
cpdef close(self):
"""
- Closes the underlaying C MW object.
+ Closes the underlying C MW object.
MW may be deleted directly or by deleting its PD, which leaves the
- Python object without the underlaying MW.
- Need to check that the underlaying MW wasn't dealloced before.
+ Python object without the underlying MW.
+ Need to check that the underlying MW wasn't dealloced before.
:return: None
"""
if self.mw is not NULL:
--
2.34.1

View File

@ -1,33 +0,0 @@
From 1a2cad26263190460ed211329d040cb1da8d7ac2 Mon Sep 17 00:00:00 2001
From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Date: Thu, 13 Oct 2022 09:49:12 +0200
Subject: rdma-ndd: disable systemd ProtectHostName feature
[ Upstream commit 57637df8dd6b92b4bcd3e04cea476012901526d0 ]
ProtectHostName prevents dynamic name changes to be noticed by the service.
This means that on a system with no static hostname, rdma-ndd is started with
a hostname 'localhost' and is not aware of new hostname retreived
through a DHCP lease.
Fixes: 384b75b5f624 ("rdma-ndd: systemd hardening")
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
---
rdma-ndd/rdma-ndd.service.in | 1 -
1 file changed, 1 deletion(-)
diff --git a/rdma-ndd/rdma-ndd.service.in b/rdma-ndd/rdma-ndd.service.in
index e3f8d11..368deec 100644
--- a/rdma-ndd/rdma-ndd.service.in
+++ b/rdma-ndd/rdma-ndd.service.in
@@ -22,7 +22,6 @@ Restart=always
ExecStart=@CMAKE_INSTALL_FULL_SBINDIR@/rdma-ndd --systemd
ProtectSystem=full
ProtectHome=true
-ProtectHostname=true
ProtectKernelLogs=true
# rdma-ndd is automatically wanted by udev when an RDMA device with a node description is present
--
2.34.1

View File

@ -1,33 +0,0 @@
From 0fcebb18e318cb0af02afa055f4357ba3002280b Mon Sep 17 00:00:00 2001
From: Lang Cheng <chenglang@huawei.com>
Date: Fri, 9 Oct 2020 11:14:39 +0800
Subject: libhns: Add RoH device IDs
Add RoH device IDs.
0xA227 is a 100Gb/s RoH device, and it was mistakenly added before.
0xA22C is a 200Gb/s RoH device.
0xA22D is a 400Gb/s RoH device.
Fixes:a0f8a069799e("libhns: Add new PCI device matching for hip08")
Signed-off-by: Lang Cheng <chenglang@huawei.com>
---
providers/hns/hns_roce_u.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 266e73e..60b3fe1 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -53,6 +53,8 @@ static const struct verbs_match_ent hca_table[] = {
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA226, &hns_roce_u_hw_v2),
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA227, &hns_roce_u_hw_v2),
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA228, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22C, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22D, &hns_roce_u_hw_v2),
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22F, &hns_roce_u_hw_v2),
{}
};
--
2.30.0

View File

@ -1,28 +0,0 @@
From 4050da82f772478e1ebbcc6bd1971ecfa07399ad Mon Sep 17 00:00:00 2001
From: Guofeng Yue <yueguofeng@hisilicon.com>
Date: Mon, 17 Oct 2022 11:12:15 +0800
Subject: Update kernel headers
To commit ?? ("RDMA/hns: Pass mac type to user driver for RoH mode").
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
---
kernel-headers/rdma/hns-abi.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 542be5e..9866c51 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -116,6 +116,8 @@ struct hns_roce_ib_alloc_ucontext_resp {
__u32 reserved;
__u32 config;
__u32 max_inline_data;
+ __u8 mac_type;
+ __u8 rsv1[7];
};
struct hns_roce_ib_alloc_ucontext {
--
2.30.0

View File

@ -1,79 +0,0 @@
From f4626313eea2c78a8fac4b007b08533371ce1501 Mon Sep 17 00:00:00 2001
From: Guofeng Yue <yueguofeng@hisilicon.com>
Date: Tue, 13 Sep 2022 20:09:27 +0800
Subject: libhns: Add the parsing of mac type in RoH mode
After parsing the mac type as RoH mode, the user driver
needs to set the dmac field of ud wqe to 0xFF, the hardware
will recognize this field, and increase the recognition of
the IP field in RoH mode, which is used for the CM link
building function in user mode.
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
---
providers/hns/hns_roce_u.c | 2 ++
providers/hns/hns_roce_u.h | 6 ++++++
providers/hns/hns_roce_u_hw_v2.c | 4 ++++
3 files changed, 12 insertions(+)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 60b3fe1..20e3698 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -121,6 +121,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
&resp.ibv_resp, sizeof(resp)))
goto err_free;
+ hr_dev->mac_type = resp.mac_type;
+
if (!resp.cqe_size)
context->cqe_size = HNS_ROCE_CQE_SIZE;
else if (resp.cqe_size <= HNS_ROCE_V3_CQE_SIZE)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 8c1cb1e..623be79 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -153,11 +153,17 @@
#define HNS_ROCE_SRQ_TABLE_BITS 8
#define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
+enum {
+ HNAE3_MAC_ETH,
+ HNAE3_MAC_ROH,
+};
+
struct hns_roce_device {
struct verbs_device ibv_dev;
int page_size;
const struct hns_roce_u_hw *u_hw;
int hw_version;
+ uint8_t mac_type;
};
struct hns_roce_buf {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index c652eea..b0c3109 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1068,6 +1068,7 @@ static inline void enable_wqe(struct hns_roce_qp *qp, void *sq_wqe,
static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
unsigned int nreq, struct hns_roce_sge_info *sge_info)
{
+ struct hns_roce_device *hr_dev = to_hr_dev(qp->verbs_qp.qp.context->device);
struct hns_roce_ah *ah = to_hr_ah(wr->wr.ud.ah);
struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
int ret = 0;
@@ -1092,6 +1093,9 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
if (ret)
return ret;
+ if (hr_dev->mac_type == HNAE3_MAC_ROH)
+ ud_sq_wqe->dmac[0] = 0xFF;
+
ret = fill_ud_data_seg(ud_sq_wqe, qp, wr, sge_info);
if (ret)
return ret;
--
2.30.0

View File

@ -1,835 +0,0 @@
From 812372fadc96c6c59d460d84cdc72d60014f220d Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Mon, 27 Jun 2022 14:52:04 +0800
Subject: [PATCH rdma-core] libhns: Add support for the thread domain and the parent domain
Internal locks will be disabled for queues under the
thread domain with the parent domain.
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63NGA
------------------------------------------------------------------
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 5 +-
providers/hns/hns_roce_u.h | 75 ++++++++-
providers/hns/hns_roce_u_hw_v2.c | 77 ++++-----
providers/hns/hns_roce_u_verbs.c | 267 ++++++++++++++++++++++++++++---
4 files changed, 357 insertions(+), 67 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 266e73e..e3c72bb 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -67,7 +67,7 @@ static const struct verbs_context_ops hns_common_ops = {
.create_qp = hns_roce_u_create_qp,
.create_qp_ex = hns_roce_u_create_qp_ex,
.dealloc_mw = hns_roce_u_dealloc_mw,
- .dealloc_pd = hns_roce_u_free_pd,
+ .dealloc_pd = hns_roce_u_dealloc_pd,
.dereg_mr = hns_roce_u_dereg_mr,
.destroy_cq = hns_roce_u_destroy_cq,
.modify_cq = hns_roce_u_modify_cq,
@@ -88,6 +88,9 @@ static const struct verbs_context_ops hns_common_ops = {
.close_xrcd = hns_roce_u_close_xrcd,
.open_qp = hns_roce_u_open_qp,
.get_srq_num = hns_roce_u_get_srq_num,
+ .alloc_td = hns_roce_u_alloc_td,
+ .dealloc_td = hns_roce_u_dealloc_td,
+ .alloc_parent_domain = hns_roce_u_alloc_pad,
};
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 8c1cb1e..8181da7 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -188,6 +188,11 @@ struct hns_roce_db_page {
unsigned long *bitmap;
};
+struct hns_roce_spinlock {
+ pthread_spinlock_t lock;
+ int need_lock;
+};
+
struct hns_roce_context {
struct verbs_context ibv_ctx;
void *uar;
@@ -222,15 +227,27 @@ struct hns_roce_context {
unsigned int max_inline_data;
};
+struct hns_roce_td {
+ struct ibv_td ibv_td;
+ atomic_int refcount;
+};
+
struct hns_roce_pd {
struct ibv_pd ibv_pd;
unsigned int pdn;
+ atomic_int refcount;
+ struct hns_roce_pd *protection_domain;
+};
+
+struct hns_roce_pad {
+ struct hns_roce_pd pd;
+ struct hns_roce_td *td;
};
struct hns_roce_cq {
struct verbs_cq verbs_cq;
struct hns_roce_buf buf;
- pthread_spinlock_t lock;
+ struct hns_roce_spinlock hr_lock;
unsigned int cqn;
unsigned int cq_depth;
unsigned int cons_index;
@@ -266,7 +283,7 @@ struct hns_roce_srq {
struct hns_roce_idx_que idx_que;
struct hns_roce_buf wqe_buf;
struct hns_roce_rinl_buf srq_rinl_buf;
- pthread_spinlock_t lock;
+ struct hns_roce_spinlock hr_lock;
unsigned long *wrid;
unsigned int srqn;
unsigned int wqe_cnt;
@@ -279,7 +296,7 @@ struct hns_roce_srq {
struct hns_roce_wq {
unsigned long *wrid;
- pthread_spinlock_t lock;
+ struct hns_roce_spinlock hr_lock;
unsigned int wqe_cnt;
int max_post;
unsigned int head;
@@ -397,9 +414,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx)
return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context);
}
+static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td)
+{
+ return container_of(ibv_td, struct hns_roce_td, ibv_td);
+}
+
+/* to_hr_pd always returns the real hns_roce_pd obj. */
static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
{
- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
+ struct hns_roce_pd *pd =
+ container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
+
+ if (pd->protection_domain)
+ return pd->protection_domain;
+
+ return pd;
+}
+
+static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd)
+{
+ struct hns_roce_pad *pad =
+ ibv_pd ?
+ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) :
+ NULL;
+
+ if (pad && pad->pd.protection_domain)
+ return pad;
+
+ /* Otherwise ibv_pd isn't a parent_domain */
+ return NULL;
}
static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
@@ -422,14 +465,35 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah)
return container_of(ibv_ah, struct hns_roce_ah, ibv_ah);
}
+static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock)
+{
+ if (hr_lock->need_lock)
+ return pthread_spin_lock(&hr_lock->lock);
+
+ return 0;
+}
+
+static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
+{
+ if (hr_lock->need_lock)
+ return pthread_spin_unlock(&hr_lock->lock);
+
+ return 0;
+}
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size);
int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
struct ibv_port_attr *attr);
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
+ struct ibv_td_init_attr *attr);
+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td);
+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
+ struct ibv_parent_domain_init_attr *attr);
struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
-int hns_roce_u_free_pd(struct ibv_pd *pd);
+int hns_roce_u_dealloc_pd(struct ibv_pd *pd);
struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
uint64_t hca_va, int access);
@@ -488,6 +552,7 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd);
int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
int page_size);
void hns_roce_free_buf(struct hns_roce_buf *buf);
+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index c652eea..80e836d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -227,14 +227,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
uint32_t bitmap_num;
int bit_num;
- pthread_spin_lock(&srq->lock);
+ hns_roce_spin_lock(&srq->hr_lock);
bitmap_num = ind / BIT_CNT_PER_LONG;
bit_num = ind % BIT_CNT_PER_LONG;
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
srq->idx_que.tail++;
- pthread_spin_unlock(&srq->lock);
+ hns_roce_spin_unlock(&srq->hr_lock);
}
static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
@@ -266,9 +266,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
if (cur + nreq < wq->max_post)
return 0;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
cur = wq->head - wq->tail;
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return cur + nreq >= wq->max_post;
}
@@ -721,7 +721,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
int err = V2_CQ_OK;
int npolled;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
for (npolled = 0; npolled < ne; ++npolled) {
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
@@ -736,7 +736,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
update_cq_db(ctx, cq);
}
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return err == V2_CQ_POLL_ERR ? err : npolled;
}
@@ -1273,7 +1273,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
return ret;
}
- pthread_spin_lock(&qp->sq.lock);
+ hns_roce_spin_lock(&qp->sq.hr_lock);
sge_info.start_idx = qp->next_sge; /* start index of extend sge */
@@ -1333,7 +1333,7 @@ out:
*(qp->sdb) = qp->sq.head & 0xffff;
}
- pthread_spin_unlock(&qp->sq.lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
@@ -1426,7 +1426,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
return ret;
}
- pthread_spin_lock(&qp->rq.lock);
+ hns_roce_spin_lock(&qp->rq.hr_lock);
max_sge = qp->rq.max_gs - qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -1460,7 +1460,7 @@ out:
hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head);
}
- pthread_spin_unlock(&qp->rq.lock);
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
@@ -1515,9 +1515,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn,
struct hns_roce_srq *srq)
{
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
__hns_roce_v2_cq_clean(cq, qpn, srq);
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
}
static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
@@ -1550,8 +1550,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
int ret;
if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) {
- pthread_spin_lock(&hr_qp->sq.lock);
- pthread_spin_lock(&hr_qp->rq.lock);
+ hns_roce_spin_lock(&hr_qp->sq.hr_lock);
+ hns_roce_spin_lock(&hr_qp->rq.hr_lock);
flag = true;
}
@@ -1560,8 +1560,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
sizeof(resp_ex));
if (flag) {
- pthread_spin_unlock(&hr_qp->rq.lock);
- pthread_spin_unlock(&hr_qp->sq.lock);
+ hns_roce_spin_unlock(&hr_qp->sq.hr_lock);
+ hns_roce_spin_unlock(&hr_qp->rq.hr_lock);
}
if (ret)
@@ -1602,18 +1602,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- pthread_spin_lock(&send_cq->lock);
- pthread_spin_lock(&recv_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
} else {
- pthread_spin_lock(&recv_cq->lock);
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
}
} else if (send_cq) {
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
} else if (recv_cq) {
- pthread_spin_lock(&recv_cq->lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
}
}
@@ -1624,18 +1624,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- pthread_spin_unlock(&recv_cq->lock);
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else {
- pthread_spin_unlock(&send_cq->lock);
- pthread_spin_unlock(&recv_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
}
} else if (send_cq) {
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else if (recv_cq) {
- pthread_spin_unlock(&recv_cq->lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
}
}
@@ -1750,7 +1750,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
int ret = 0;
void *wqe;
- pthread_spin_lock(&srq->lock);
+ hns_roce_spin_lock(&srq->hr_lock);
max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -1789,7 +1789,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
(__le32 *)&srq_db);
}
- pthread_spin_unlock(&srq->lock);
+ hns_roce_spin_unlock(&srq->hr_lock);
return ret;
}
@@ -1805,11 +1805,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
if (attr->comp_mask)
return EINVAL;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (err != V2_CQ_OK)
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return err;
}
@@ -1843,7 +1843,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
else
update_cq_db(ctx, cq);
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
}
static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
@@ -2558,7 +2558,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
return;
}
- pthread_spin_lock(&qp->sq.lock);
+ hns_roce_spin_lock(&qp->sq.hr_lock);
qp->sge_info.start_idx = qp->next_sge;
qp->rb_sq_head = qp->sq.head;
qp->err = 0;
@@ -2591,7 +2591,8 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
}
out:
- pthread_spin_unlock(&qp->sq.lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
+
if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
@@ -2606,7 +2607,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp)
qp->sq.head = qp->rb_sq_head;
- pthread_spin_unlock(&qp->sq.lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
}
enum {
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 3b7a67d..f6c7423 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -33,6 +33,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <math.h>
#include <errno.h>
#include <pthread.h>
#include <sys/mman.h>
@@ -42,6 +43,38 @@
#include "hns_roce_u_db.h"
#include "hns_roce_u_hw_v2.h"
+static int hns_roce_whether_need_lock(struct ibv_pd *pd)
+{
+ struct hns_roce_pad *pad;
+ bool need_lock = true;
+
+ pad = to_hr_pad(pd);
+ if (pad && pad->td)
+ need_lock = false;
+
+ return need_lock;
+}
+
+static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock,
+ bool need_lock)
+{
+ hr_lock->need_lock = need_lock;
+
+ if (need_lock)
+ return pthread_spin_init(&hr_lock->lock,
+ PTHREAD_PROCESS_PRIVATE);
+
+ return 0;
+}
+
+static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock)
+{
+ if (hr_lock->need_lock)
+ return pthread_spin_destroy(&hr_lock->lock);
+
+ return 0;
+}
+
void hns_roce_init_qp_indices(struct hns_roce_qp *qp)
{
qp->sq.head = 0;
@@ -85,40 +118,153 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
}
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
+ struct ibv_td_init_attr *attr)
+{
+ struct hns_roce_td *td;
+
+ if (attr->comp_mask) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ td = calloc(1, sizeof(*td));
+ if (!td) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ td->ibv_td.context = context;
+ atomic_init(&td->refcount, 1);
+
+ return &td->ibv_td;
+}
+
+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td)
+{
+ struct hns_roce_td *td;
+ int ret = 0;
+
+ td = to_hr_td(ibv_td);
+ if (atomic_load(&td->refcount) > 1) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ free(td);
+
+err:
+ errno = abs(ret);
+ return ret;
+}
+
struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context)
{
+ struct hns_roce_alloc_pd_resp resp = {};
struct ibv_alloc_pd cmd;
struct hns_roce_pd *pd;
- struct hns_roce_alloc_pd_resp resp = {};
-
- pd = malloc(sizeof(*pd));
- if (!pd)
- return NULL;
+ int ret;
- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
- &resp.ibv_resp, sizeof(resp))) {
- free(pd);
+ pd = calloc(1, sizeof(*pd));
+ if (!pd) {
+ errno = ENOMEM;
return NULL;
}
+ ret = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp));
+
+ if (ret)
+ goto err;
+
+ atomic_init(&pd->refcount, 1);
pd->pdn = resp.pdn;
return &pd->ibv_pd;
+
+err:
+ free(pd);
+ errno = abs(ret);
+ return NULL;
}
-int hns_roce_u_free_pd(struct ibv_pd *pd)
+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
+ struct ibv_parent_domain_init_attr *attr)
+{
+ struct hns_roce_pad *pad;
+
+ if (ibv_check_alloc_parent_domain(attr))
+ return NULL;
+
+ if (attr->comp_mask) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ pad = calloc(1, sizeof(*pad));
+ if (!pad) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ if (attr->td) {
+ pad->td = to_hr_td(attr->td);
+ atomic_fetch_add(&pad->td->refcount, 1);
+ }
+
+ pad->pd.protection_domain = to_hr_pd(attr->pd);
+ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1);
+
+ ibv_initialize_parent_domain(&pad->pd.ibv_pd,
+ &pad->pd.protection_domain->ibv_pd);
+
+ return &pad->pd.ibv_pd;
+}
+
+static void hns_roce_free_pad(struct hns_roce_pad *pad)
+{
+ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1);
+
+ if (pad->td)
+ atomic_fetch_sub(&pad->td->refcount, 1);
+
+ free(pad);
+}
+
+static int hns_roce_free_pd(struct hns_roce_pd *pd)
{
int ret;
- ret = ibv_cmd_dealloc_pd(pd);
+ if (atomic_load(&pd->refcount) > 1) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd);
if (ret)
- return ret;
+ goto err;
- free(to_hr_pd(pd));
+ free(pd);
+
+err:
+ errno = abs(ret);
return ret;
}
+int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd)
+{
+ struct hns_roce_pad *pad = to_hr_pad(ibv_pd);
+ struct hns_roce_pd *pd = to_hr_pd(ibv_pd);
+
+ if (pad) {
+ hns_roce_free_pad(pad);
+ return 0;
+ }
+
+ return hns_roce_free_pd(pd);
+}
+
struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,
struct ibv_xrcd_init_attr *xrcd_init_attr)
{
@@ -275,6 +421,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
return 0;
}
+enum {
+ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS |
+ IBV_CQ_INIT_ATTR_MASK_PD,
+};
+
enum {
CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
IBV_WC_EX_WITH_CVLAN,
@@ -286,12 +437,22 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
if (!attr->cqe || attr->cqe > context->max_cqe)
return -EINVAL;
- if (attr->comp_mask)
- return -EOPNOTSUPP;
+ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) {
+ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n",
+ attr->comp_mask);
+ return EOPNOTSUPP;
+ }
if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
return -EOPNOTSUPP;
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
+ if (!to_hr_pad(attr->parent_domain)) {
+ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n");
+ return EINVAL;
+ }
+ }
+
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
roundup_pow_of_two(attr->cqe));
@@ -341,7 +502,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
struct ibv_cq_init_attr_ex *attr)
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
+ struct hns_roce_pad *pad = NULL;
struct hns_roce_cq *cq;
+ int need_lock;
int ret;
ret = verify_cq_create_attr(attr, hr_ctx);
@@ -354,7 +517,14 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
goto err;
}
- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
+ pad = to_hr_pad(attr->parent_domain);
+
+ need_lock = hns_roce_whether_need_lock(pad ? &pad->pd.ibv_pd : NULL);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
+
+ ret = hns_roce_spinlock_init(&cq->hr_lock, need_lock);
if (ret)
goto err_lock;
@@ -385,14 +555,12 @@ err_cmd:
hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
err_db:
hns_roce_free_buf(&cq->buf);
-err_lock:
err_buf:
+ hns_roce_spinlock_destroy(&cq->hr_lock);
+err_lock:
free(cq);
err:
- if (ret < 0)
- ret = -ret;
-
- errno = ret;
+ errno = abs(ret);
return NULL;
}
@@ -655,6 +823,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_srq *srq;
+ int need_lock;
int ret;
ret = verify_srq_create_attr(hr_ctx, init_attr);
@@ -667,7 +836,11 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
goto err;
}
- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+ need_lock = hns_roce_whether_need_lock(init_attr->pd);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
+
+ if (hns_roce_spinlock_init(&srq->hr_lock, need_lock))
goto err_free_srq;
set_srq_param(context, srq, init_attr);
@@ -875,6 +1048,48 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
return verify_qp_create_cap(ctx, attr);
}
+static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp)
+{
+ int sq_need_lock;
+ int rq_need_lock;
+ int ret;
+
+ sq_need_lock = hns_roce_whether_need_lock(attr->pd);
+ if (!sq_need_lock)
+ verbs_warn(&ctx->ibv_ctx, "configure sq as no lock.\n");
+
+ rq_need_lock = hns_roce_whether_need_lock(attr->pd);
+ if (!rq_need_lock)
+ verbs_warn(&ctx->ibv_ctx, "configure rq as no lock.\n");
+
+ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock);
+ if (ret) {
+ verbs_err(&ctx->ibv_ctx, "failed to init sq spinlock.\n");
+ return ret;
+ }
+
+ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock);
+ if (ret) {
+ verbs_err(&ctx->ibv_ctx, "failed to init rq spinlock.\n");
+ goto err_rq_lock;
+ }
+
+ return 0;
+
+err_rq_lock:
+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
+
+ return ret;
+}
+
+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp)
+{
+ hns_roce_spinlock_destroy(&qp->rq.hr_lock);
+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
+}
+
static int alloc_recv_rinl_buf(uint32_t max_sge,
struct hns_roce_rinl_buf *rinl_buf)
{
@@ -1248,8 +1463,8 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
{
int ret;
- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
+ if (pthread_spin_init(&qp->sq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE) ||
+ pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
return -ENOMEM;
ret = qp_alloc_wqe(&attr->cap, qp, ctx);
@@ -1294,6 +1509,10 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
hns_roce_set_qp_params(attr, qp, context);
+ ret = hns_roce_qp_spinlock_init(context, attr, qp);
+ if (ret)
+ goto err_spinlock;
+
ret = hns_roce_alloc_qp_buf(attr, qp, context);
if (ret)
goto err_buf;
@@ -1327,6 +1546,8 @@ err_ops:
err_cmd:
hns_roce_free_qp_buf(qp, context);
err_buf:
+ hns_roce_qp_spinlock_destroy(qp);
+err_spinlock:
free(qp);
err:
if (ret < 0)
--
2.30.0

View File

@ -1,152 +0,0 @@
From 7d72b40d311875677135289874d4a69e4891b0de Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 28 Nov 2022 21:52:20 +0800
Subject: Update kernel headers
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA
mode").
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 84 ++++++++++++++++++++++++++++++++---
1 file changed, 78 insertions(+), 6 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 9866c51..6950841 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -77,7 +77,9 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
};
struct hns_roce_ib_create_qp_resp {
@@ -95,33 +97,46 @@ struct hns_roce_ib_modify_qp_resp {
__u8 tc_mode;
__u8 priority;
__u8 reserved[6];
+ __u32 dcan;
+ __u32 rsv2;
};
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
+ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
};
enum {
HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
+ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
};
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
- __u32 srq_tab_size;
- __u32 reserved;
- __u32 config;
- __u32 max_inline_data;
- __u8 mac_type;
- __u8 rsv1[7];
+ __u32 srq_tab_size;
+ __u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
+ __u8 mac_type;
+ __u8 rsv1[7];
+ __u32 dca_qps;
+ __u32 dca_mmap_size;
+ __aligned_u64 dca_mmap_key;
+};
+
+enum hns_roce_uctx_comp_mask {
+ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0,
};
struct hns_roce_ib_alloc_ucontext {
__u32 config;
+ __u32 comp; /* use hns_roce_uctx_comp_mask */
+ __u32 dca_max_qps;
__u32 reserved;
};
@@ -129,4 +144,61 @@ struct hns_roce_ib_alloc_pd_resp {
__u32 pdn;
};
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+enum hns_ib_objects {
+ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_methods {
+ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_METHOD_DCA_MEM_DEREG,
+ HNS_IB_METHOD_DCA_MEM_SHRINK,
+ HNS_IB_METHOD_DCA_MEM_ATTACH,
+ HNS_IB_METHOD_DCA_MEM_DETACH,
+ HNS_IB_METHOD_DCA_MEM_QUERY,
+};
+
+enum hns_ib_dca_mem_reg_attrs {
+ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_REG_FLAGS,
+ HNS_IB_ATTR_DCA_MEM_REG_LEN,
+ HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+ HNS_IB_ATTR_DCA_MEM_REG_KEY,
+};
+
+enum hns_ib_dca_mem_dereg_attrs {
+ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_shrink_attrs {
+ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+};
+
+enum hns_ib_dca_mem_attach_attrs {
+ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+};
+
+enum hns_ib_dca_mem_detach_attrs {
+ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+};
+
+enum hns_ib_dca_mem_query_attrs {
+ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+};
+
#endif /* HNS_ABI_USER_H */
--
2.30.0

View File

@ -1,342 +0,0 @@
From 58de0f69573e8b76affe401a261f17f1a5cedc01 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:09 +0800
Subject: libhns: Introduce DCA for RC QP
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The HIP09 introduces the DCA(Dynamic context attachment) feature which
supports many RC QPs to share the WQE buffer in a memory pool, this will
reduce the memory consumption when there are too many QPs inactive.
Two functions are defined for adding buffers to memory pool and removing
buffers from memory pool by calling ib cmd implemented in hns kernelspace
driver.
If a QP enables DCA feature, the WQE's buffer will be attached to the
memory pool when the users start to post WRs and be detached when all CQEs
has been polled.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 59 ++++++++++++-
providers/hns/hns_roce_u.h | 21 +++++
providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++
3 files changed, 223 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 3428bda..bd2b251 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -95,6 +95,53 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ int ret;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return 0;
+
+ list_head_init(&dca_ctx->mem_list);
+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret)
+ return ret;
+
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+ dca_ctx->mem_cnt = 0;
+
+ return 0;
+}
+
+static void uninit_dca_context(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ hns_roce_cleanup_dca_mem(ctx);
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ pthread_spin_destroy(&dca_ctx->lock);
+}
+
+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
+ struct hns_roce_context *context, int cmd_fd)
+{
+ int page_size = hr_dev->page_size;
+
+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, cmd_fd, 0);
+ if (context->uar == MAP_FAILED)
+ return -ENOMEM;
+
+ return 0;
+}
+
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
{
uint32_t count_shift = hr_ilog32(entry_count);
@@ -119,7 +166,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return NULL;
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS;
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -165,11 +212,12 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, cmd_fd, 0);
- if (context->uar == MAP_FAILED)
+ if (init_dca_context(context, hr_dev->page_size))
goto err_free;
+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
+ goto dca_free;
+
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
@@ -177,6 +225,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return &context->ibv_ctx;
+dca_free:
+ uninit_dca_context(context);
err_free:
verbs_uninit_context(&context->ibv_ctx);
free(context);
@@ -189,6 +239,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
struct hns_roce_context *context = to_hr_ctx(ibctx);
munmap(context->uar, hr_dev->page_size);
+ uninit_dca_context(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
}
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 2edb07e..0e25ce5 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -147,6 +147,10 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+enum {
+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
+};
+
#define HNS_ROCE_QP_TABLE_BITS 8
#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
@@ -199,6 +203,18 @@ struct hns_roce_spinlock {
int need_lock;
};
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
+
+struct hns_roce_dca_ctx {
+ struct list_head mem_list;
+ pthread_spinlock_t lock;
+ int mem_cnt;
+ unsigned int unit_size;
+ uint64_t max_size;
+ uint64_t curr_size;
+};
+
struct hns_roce_context {
struct verbs_context ibv_ctx;
void *uar;
@@ -231,6 +247,8 @@ struct hns_roce_context {
unsigned int cqe_size;
uint32_t config;
unsigned int max_inline_data;
+
+ struct hns_roce_dca_ctx dca_ctx;
};
struct hns_roce_td {
@@ -562,6 +580,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
+
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 471dd9c..02c43ae 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
munmap(buf->buf, buf->length);
}
+
+struct hns_roce_dca_mem {
+ uint32_t handle;
+ struct list_node entry;
+ struct hns_roce_buf buf;
+ struct hns_roce_context *ctx;
+};
+
+static void free_dca_mem(struct hns_roce_context *ctx,
+ struct hns_roce_dca_mem *mem)
+{
+ hns_roce_free_buf(&mem->buf);
+ free(mem);
+}
+
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
+{
+ struct hns_roce_dca_mem *mem = NULL;
+ int ret;
+
+ mem = malloc(sizeof(struct hns_roce_dca_mem));
+ if (!mem) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
+ if (ret) {
+ errno = ENOMEM;
+ free(mem);
+ return NULL;
+ }
+
+ return mem;
+}
+
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
+{
+ return (uintptr_t)dca_mem;
+}
+
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
+{
+ return dca_mem->buf.buf + offset;
+}
+
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
+ void *addr, uint32_t size, uint32_t *handle)
+{
+ struct ib_uverbs_attr *attr;
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_REG, 4);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+ ioctl_ptr_to_u64(addr));
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
+
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret) {
+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
+
+ return 0;
+}
+
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_warn(&ctx->ibv_ctx,
+ "failed to dereg DCA mem-%u, ret = %d.\n",
+ handle, ret);
+}
+
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ struct hns_roce_dca_mem *tmp;
+
+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
+ deregister_dca_mem(ctx, mem->handle);
+}
+
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
+ uint32_t alloc_size)
+{
+ bool enable;
+
+ pthread_spin_lock(&ctx->lock);
+
+ if (ctx->unit_size == 0) /* Pool size can't be increased */
+ enable = false;
+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
+ enable = true;
+ else /* Pool size doesn't exceed max size */
+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
+
+ pthread_spin_unlock(&ctx->lock);
+
+ return enable;
+}
+
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ int ret;
+
+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
+ return -ENOMEM;
+
+ /* Step 1: Alloc DCA mem address */
+ mem = alloc_dca_mem(
+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
+ if (!mem)
+ return -ENOMEM;
+
+ /* Step 2: Register DCA mem uobject to pin user address */
+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
+ mem->buf.length, &mem->handle);
+ if (ret) {
+ free_dca_mem(ctx, mem);
+ return ret;
+ }
+
+ /* Step 3: Add DCA mem node to pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
+ dca_ctx->mem_cnt++;
+ dca_ctx->curr_size += mem->buf.length;
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ return 0;
+}
--
2.30.0

View File

@ -1,204 +0,0 @@
From c8d7a2dc811a18ffd314b8764c961234e5f2ec77 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:13 +0800
Subject: libhns: Add support for shrinking DCA memory pool
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The QP's WQE buffer may be detached after QP is modified or CQE is polled,
and the state of DCA mem object may be changed as clean for no QP is using
it. So shrink the clean DCA mem from the memory pool and destroy the DCA
mem's buffer to reduce the memory consumption.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 2 +
providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.c | 7 +++
3 files changed, 112 insertions(+)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 0e25ce5..7b5c5c9 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -212,6 +212,7 @@ struct hns_roce_dca_ctx {
int mem_cnt;
unsigned int unit_size;
uint64_t max_size;
+ uint64_t min_size;
uint64_t curr_size;
};
@@ -580,6 +581,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 02c43ae..c0f86e9 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
return (uintptr_t)dca_mem;
}
+static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx,
+ uint64_t key)
+{
+ struct hns_roce_dca_mem *mem;
+ struct hns_roce_dca_mem *tmp;
+
+ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) {
+ if (dca_mem_to_key(mem) == key)
+ return mem;
+ }
+
+ return NULL;
+}
+
static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
{
return dca_mem->buf.buf + offset;
@@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
deregister_dca_mem(ctx, mem->handle);
}
+struct hns_dca_mem_shrink_resp {
+ uint32_t free_mems;
+ uint64_t free_key;
+};
+
+static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ uint64_t size, struct hns_dca_mem_shrink_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_SHRINK, 4);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle);
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+ &resp->free_key, sizeof(resp->free_key));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+ &resp->free_mems, sizeof(resp->free_mems));
+
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n",
+ ret);
+
+ return ret;
+}
static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
uint32_t alloc_size)
{
@@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
return enable;
}
+static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
+{
+ bool enable;
+
+ pthread_spin_lock(&ctx->lock);
+ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size;
+ pthread_spin_unlock(&ctx->lock);
+
+ return enable;
+}
+
int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
@@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
return 0;
}
+
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_dca_mem_shrink_resp resp = {};
+ struct hns_roce_dca_mem *mem;
+ int dca_mem_cnt;
+ uint32_t handle;
+ int ret;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ dca_mem_cnt = ctx->dca_ctx.mem_cnt;
+ pthread_spin_unlock(&dca_ctx->lock);
+ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) {
+ resp.free_mems = 0;
+ /* Step 1: Use any DCA mem uobject to shrink pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = list_tail(&dca_ctx->mem_list,
+ struct hns_roce_dca_mem, entry);
+ handle = mem ? mem->handle : 0;
+ pthread_spin_unlock(&dca_ctx->lock);
+ if (!mem)
+ break;
+
+ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp);
+ if (ret || likely(resp.free_mems < 1))
+ break;
+
+ /* Step 2: Remove shrunk DCA mem node from pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = key_to_dca_mem(dca_ctx, resp.free_key);
+ if (mem) {
+ list_del(&mem->entry);
+ dca_ctx->mem_cnt--;
+ dca_ctx->curr_size -= mem->buf.length;
+ }
+
+ handle = mem ? mem->handle : 0;
+ pthread_spin_unlock(&dca_ctx->lock);
+ if (!mem)
+ break;
+
+ /* Step 3: Destroy DCA mem uobject */
+ deregister_dca_mem(ctx, handle);
+ free_dca_mem(ctx, mem);
+ /* No any free memory after deregister 1 DCA mem */
+ if (resp.free_mems <= 1)
+ break;
+
+ dca_mem_cnt--;
+ }
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 7b2f2d1..f3a7e6b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -738,6 +738,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
hns_roce_spin_unlock(&cq->hr_lock);
+ /* Try to shrink the DCA mem */
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
return err == V2_CQ_POLL_ERR ? err : npolled;
}
@@ -1674,6 +1678,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
free(qp);
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
return ret;
}
--
2.30.0

View File

@ -1,618 +0,0 @@
From 835bc1a62dfc3398ef9da23de07348a353f67214 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:17 +0800
Subject: libhns: Add support for attaching QP's WQE buffer
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
If a uQP works in DCA mode, the WQE's buffer will be split as many blocks
and be stored into a list. The blocks are allocated from the DCA's memory
pool before posting WRs and are dropped when the QP's CI is equal to PI
after polling CQ.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 26 ++++-
providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++-
providers/hns/hns_roce_u_hw_v2.c | 151 ++++++++++++++++++++++++---
providers/hns/hns_roce_u_hw_v2.h | 2 +
providers/hns/hns_roce_u_verbs.c | 32 ++++--
5 files changed, 358 insertions(+), 26 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 7b5c5c9..44a733f 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -348,11 +348,18 @@ struct hns_roce_sge_ex {
unsigned int sge_shift;
};
+struct hns_roce_dca_buf {
+ void **bufs;
+ unsigned int max_cnt;
+ unsigned int shift;
+};
+
struct hns_roce_qp {
struct verbs_qp verbs_qp;
struct hns_roce_buf buf;
+ struct hns_roce_dca_buf dca_wqe;
int max_inline_data;
- int buf_size;
+ unsigned int buf_size;
unsigned int sq_signal_bits;
struct hns_roce_wq sq;
struct hns_roce_wq rq;
@@ -401,11 +408,22 @@ struct hns_roce_u_hw {
struct verbs_context_ops hw_ops;
};
+struct hns_roce_dca_attach_attr {
+ uint32_t sq_offset;
+ uint32_t sge_offset;
+ uint32_t rq_offset;
+};
+
+struct hns_roce_dca_detach_attr {
+ uint32_t sq_index;
+};
+
/*
* The entries's buffer should be aligned to a multiple of the hardware's
* minimum page size.
*/
#define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
+#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
{
@@ -581,9 +599,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ uint32_t size, struct hns_roce_dca_buf *buf);
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_detach_attr *attr);
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index c0f86e9..3d41b89 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
return ret;
}
+
+struct hns_dca_mem_query_resp {
+ uint64_t key;
+ uint32_t offset;
+ uint32_t page_count;
+};
+
+static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ uint32_t index, struct hns_dca_mem_query_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_QUERY, 5);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+ &resp->key, sizeof(resp->key));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+ &resp->offset, sizeof(resp->offset));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+ &resp->page_count, sizeof(resp->page_count));
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx,
+ "failed to query DCA mem-%u, ret = %d.\n",
+ handle, ret);
+
+ return ret;
+}
+
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_detach_attr *attr)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_DETACH, 4);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+ attr->sq_index);
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_warn(&ctx->ibv_ctx,
+ "failed to detach DCA mem-%u, ret = %d.\n",
+ handle, ret);
+}
+
+struct hns_dca_mem_attach_resp {
+#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
+ uint32_t alloc_flags;
+ uint32_t alloc_pages;
+};
+
+static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ struct hns_dca_mem_attach_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+ attr->sq_offset);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+ attr->sge_offset);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+ attr->rq_offset);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+ &resp->alloc_flags, sizeof(resp->alloc_flags));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+ &resp->alloc_pages, sizeof(resp->alloc_pages));
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx,
+ "failed to attach DCA mem-%u, ret = %d.\n",
+ handle, ret);
+
+ return ret;
+}
+
static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
uint32_t alloc_size)
{
@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
return enable;
}
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
struct hns_roce_dca_mem *mem;
@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
dca_mem_cnt--;
}
}
+
+static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf,
+ uint32_t page_index, int page_count)
+{
+ void **pages = &buf->bufs[page_index];
+ int page_size = 1 << buf->shift;
+ int i;
+
+ for (i = 0; i < page_count; i++) {
+ pages[i] = addr;
+ addr += page_size;
+ }
+}
+
+static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_buf *buf, uint32_t page_count)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_dca_mem_query_resp resp = {};
+ struct hns_roce_dca_mem *mem;
+ uint32_t idx = 0;
+ int ret;
+
+ while (idx < page_count && idx < buf->max_cnt) {
+ resp.page_count = 0;
+ ret = query_dca_mem(ctx, handle, idx, &resp);
+ if (ret)
+ return -ENOMEM;
+ if (resp.page_count < 1)
+ break;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = key_to_dca_mem(dca_ctx, resp.key);
+ if (mem && resp.offset < mem->buf.length) {
+ config_dca_pages(dca_mem_addr(mem, resp.offset),
+ buf, idx, resp.page_count);
+ } else {
+ pthread_spin_unlock(&dca_ctx->lock);
+ break;
+ }
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ idx += resp.page_count;
+ }
+
+ return (idx >= page_count) ? 0 : -ENOMEM;
+}
+
+#define DCA_EXPAND_MEM_TRY_TIMES 3
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ uint32_t size, struct hns_roce_dca_buf *buf)
+{
+ uint32_t buf_pages = size >> buf->shift;
+ struct hns_dca_mem_attach_resp resp = {};
+ bool is_new_buf = true;
+ int try_times = 0;
+ int ret = 0;
+
+ do {
+ resp.alloc_pages = 0;
+ ret = attach_dca_mem(ctx, handle, attr, &resp);
+ if (ret)
+ break;
+
+ if (resp.alloc_pages >= buf_pages) {
+ is_new_buf = !!(resp.alloc_flags &
+ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
+ break;
+ }
+
+ ret = add_dca_mem(ctx, size);
+ if (ret)
+ break;
+ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
+
+ if (ret || resp.alloc_pages < buf_pages) {
+ verbs_err(&ctx->ibv_ctx,
+ "failed to attach, size %u count %u != %u, ret = %d.\n",
+ size, buf_pages, resp.alloc_pages, ret);
+ return -ENOMEM;
+ }
+
+ /* No need config user address if DCA config not changed */
+ if (!is_new_buf && buf->bufs[0])
+ return 0;
+
+ return setup_dca_buf(ctx, handle, buf, buf_pages);
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index f3a7e6b..7e3ad92 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -197,19 +197,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
return get_sw_cqe_v2(cq, cq->cons_index);
}
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
+{
+ return !!qp->dca_wqe.bufs;
+}
+
+static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
+{
+ if (likely(qp->buf.buf))
+ return qp->buf.buf + offset;
+ else if (unlikely(check_qp_dca_enable(qp)))
+ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] +
+ (offset & ((1 << qp->dca_wqe.shift) - 1));
+ else
+ return NULL;
+}
+
static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
}
static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
}
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
+ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
}
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
@@ -569,6 +585,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
wc->opcode = wc_send_op_map[opcode];
}
+static bool check_dca_attach_enable(struct hns_roce_qp *qp)
+{
+ return check_qp_dca_enable(qp) &&
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH);
+}
+
+static bool check_dca_detach_enable(struct hns_roce_qp *qp)
+{
+ return check_qp_dca_enable(qp) &&
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH);
+}
+
+static int dca_attach_qp_buf(struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp)
+{
+ struct hns_roce_dca_attach_attr attr = {};
+ uint32_t idx;
+ int ret;
+
+ hns_roce_spin_lock(&qp->sq.hr_lock);
+ hns_roce_spin_lock(&qp->rq.hr_lock);
+
+ if (qp->sq.wqe_cnt > 0) {
+ idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
+ attr.sq_offset = idx << qp->sq.wqe_shift;
+ }
+
+ if (qp->ex_sge.sge_cnt > 0) {
+ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
+ attr.sge_offset = idx << qp->ex_sge.sge_shift;
+ }
+
+ if (qp->rq.wqe_cnt > 0) {
+ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
+ attr.rq_offset = idx << qp->rq.wqe_shift;
+ }
+
+
+ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
+ qp->buf_size, &qp->dca_wqe);
+
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
+
+ return ret;
+}
+
+static void dca_detach_qp_buf(struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp)
+{
+ struct hns_roce_dca_detach_attr attr;
+ bool is_empty;
+
+ hns_roce_spin_lock(&qp->sq.hr_lock);
+ hns_roce_spin_lock(&qp->rq.hr_lock);
+
+ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
+ if (is_empty && qp->sq.wqe_cnt > 0)
+ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
+
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
+
+ if (is_empty)
+ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
+}
+
static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
struct hns_roce_cq *cq)
{
@@ -725,6 +808,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
for (npolled = 0; npolled < ne; ++npolled) {
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
+
if (err != V2_CQ_OK)
break;
}
@@ -768,19 +854,30 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
return 0;
}
-static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
+static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
- if (unlikely(qp->qp_type != IBV_QPT_RC &&
- qp->qp_type != IBV_QPT_UD) &&
- qp->qp_type != IBV_QPT_XRC_SEND)
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+ int ret = 0;
+
+ if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
+ ibvqp->qp_type != IBV_QPT_UD) &&
+ ibvqp->qp_type != IBV_QPT_XRC_SEND)
return -EINVAL;
- if (unlikely(qp->state == IBV_QPS_RESET ||
- qp->state == IBV_QPS_INIT ||
- qp->state == IBV_QPS_RTR))
+ if (unlikely(ibvqp->state == IBV_QPS_RESET ||
+ ibvqp->state == IBV_QPS_INIT ||
+ ibvqp->state == IBV_QPS_RTR))
return -EINVAL;
- return 0;
+ if (check_dca_attach_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret)
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u send, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ }
+
+ return ret;
}
static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
@@ -1148,6 +1245,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
return 0;
}
+static inline void fill_rc_dca_fields(uint32_t qp_num,
+ struct hns_roce_rc_sq_wqe *wqe)
+{
+ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num);
+ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH);
+}
+
static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
const struct ibv_send_wr *wr)
{
@@ -1259,6 +1363,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return ret;
wqe_valid:
+ if (check_qp_dca_enable(qp))
+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
+
enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
return 0;
@@ -1275,7 +1382,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
struct ibv_qp_attr attr;
int ret;
- ret = check_qp_send(ibvqp, ctx);
+ ret = check_qp_send(qp, ctx);
if (unlikely(ret)) {
*bad_wr = wr;
return ret;
@@ -1352,15 +1459,20 @@ out:
return ret;
}
-static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx)
+static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
- if (unlikely(qp->qp_type != IBV_QPT_RC &&
- qp->qp_type != IBV_QPT_UD))
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+
+ if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
+ ibvqp->qp_type != IBV_QPT_UD))
return -EINVAL;
- if (qp->state == IBV_QPS_RESET || qp->srq)
+ if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq)
return -EINVAL;
+ if (check_dca_attach_enable(qp))
+ return dca_attach_qp_buf(ctx, qp);
+
return 0;
}
@@ -1428,7 +1540,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
struct ibv_qp_attr attr;
int ret;
- ret = check_qp_recv(ibvqp, ctx);
+ ret = check_qp_recv(qp, ctx);
if (unlikely(ret)) {
*bad_wr = wr;
return ret;
@@ -1551,6 +1663,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
int attr_mask)
{
+ struct hns_roce_context *ctx = to_hr_ctx(qp->context);
struct hns_roce_modify_qp_ex_resp resp_ex = {};
struct hns_roce_modify_qp_ex cmd_ex = {};
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
@@ -1598,6 +1711,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
hr_qp->sl = hr_qp->priority;
+ /* Try to shrink the DCA mem */
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
record_qp_attr(qp, attr, attr_mask);
return ret;
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index d71c695..a22995d 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -239,6 +239,8 @@ struct hns_roce_rc_sq_wqe {
#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
+#define RCWQE_SQPN_L_WIDTH 2
+
struct hns_roce_v2_wqe_data_seg {
__le32 len;
__le32 lkey;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index f6c7423..749b01b 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1165,6 +1165,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
return 0;
}
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+{
+ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
+ return true;
+
+ return false;
+}
+
static void qp_free_wqe(struct hns_roce_qp *qp)
{
free_recv_rinl_buf(&qp->rq_rinl_buf);
@@ -1176,8 +1184,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
hns_roce_free_buf(&qp->buf);
}
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
- struct hns_roce_context *ctx)
+static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1195,12 +1203,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
}
if (qp->rq_rinl_buf.wqe_cnt) {
- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
+ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge,
+ &qp->rq_rinl_buf))
goto err_alloc;
}
- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE))
- goto err_alloc;
+ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+ /* when DCA is enabled, use a buffer list to store page addr */
+ qp->buf.buf = NULL;
+ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
+ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
+ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
+ if (!qp->dca_wqe.bufs)
+ goto err_alloc;
+ } else {
+ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
+ HNS_HW_PAGE_SIZE))
+ goto err_alloc;
+ }
return 0;
@@ -1467,7 +1487,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
return -ENOMEM;
- ret = qp_alloc_wqe(&attr->cap, qp, ctx);
+ ret = qp_alloc_wqe(attr, qp, ctx);
if (ret)
return ret;
--
2.30.0

View File

@ -1,167 +0,0 @@
From a5e62921afc2fcc152e8b0584f2d04d1a4db4f10 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 29 Jun 2021 20:06:47 +0800
Subject: libhns: Use shared memory to sync DCA status
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The user DCA needs to check the QP attaching state before filling wqe
buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but
this will result in too much time being wasted on system calls, so use a
shared table between user driver and kernel driver to sync DCA status.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++---
providers/hns/hns_roce_u.h | 10 ++++++++
2 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index bd2b251..fe30cda 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -95,9 +95,33 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
-static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
+ int page_size, size_t size, uint64_t mmap_key)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+ mmap_key);
+ if (addr == MAP_FAILED) {
+ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n");
+ return -EINVAL;
+ }
+
+ dca_ctx->buf_status = addr;
+ dca_ctx->sync_status = addr + size / 2;
+
+ return 0;
+}
+
+static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
+ struct hns_roce_alloc_ucontext_resp *resp,
+ int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ uint64_t mmap_key = resp->dca_mmap_key;
+ int mmap_size = resp->dca_mmap_size;
+ int max_qps = resp->dca_qps;
int ret;
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
@@ -112,6 +136,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size)
dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
dca_ctx->mem_cnt = 0;
+ if (mmap_key) {
+ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
+
+ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) {
+ dca_ctx->status_size = mmap_size;
+ dca_ctx->max_qps = min_t(int, max_qps,
+ mmap_size * 8 / bits_per_qp);
+ }
+ }
+
return 0;
}
@@ -125,6 +159,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx)
pthread_spin_lock(&dca_ctx->lock);
hns_roce_cleanup_dca_mem(ctx);
pthread_spin_unlock(&dca_ctx->lock);
+ if (dca_ctx->buf_status)
+ munmap(dca_ctx->buf_status, dca_ctx->status_size);
pthread_spin_destroy(&dca_ctx->lock);
}
@@ -149,6 +185,14 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size)
+{
+ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
+ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS;
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -165,8 +209,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ ucontext_set_cmd(&cmd, hr_dev->page_size);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -212,7 +255,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- if (init_dca_context(context, hr_dev->page_size))
+ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size))
goto err_free;
if (hns_roce_mmap(hr_dev, context, cmd_fd))
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 44a733f..a8f811e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -35,6 +35,7 @@
#include <stddef.h>
#include <endian.h>
+#include <stdatomic.h>
#include <util/compiler.h>
#include <infiniband/driver.h>
@@ -44,6 +45,7 @@
#include <ccan/array_size.h>
#include <util/bitmap.h>
#include <ccan/container_of.h>
+#include <ccan/minmax.h>
#include <linux/if_ether.h>
#include "hns_roce_u_abi.h"
@@ -52,6 +54,8 @@
#define PFX "hns: "
+typedef _Atomic(uint64_t) atomic_bitmap_t;
+
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -214,6 +218,12 @@ struct hns_roce_dca_ctx {
uint64_t max_size;
uint64_t min_size;
uint64_t curr_size;
+
+#define HNS_DCA_BITS_PER_STATUS 1
+ unsigned int max_qps;
+ unsigned int status_size;
+ atomic_bitmap_t *buf_status;
+ atomic_bitmap_t *sync_status;
};
struct hns_roce_context {
--
2.30.0

View File

@ -1,222 +0,0 @@
From 13d4b60fcd0880fae54b1af627eeb7297d7b086d Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 29 Jun 2021 21:01:27 +0800
Subject: libhns: Sync DCA status by shared memory
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
Use DCA num from the resp of modify_qp() and indicate the DCA status bit in
the shared memory, if the num is valid, the user DCA can get the DCA status
by testing the bit in the shared memory for each QP, othewise invoke the
verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status.
Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status
changing by kernel driver or user driver, another bit is used to indicate
the DCA attaching status.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++
providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++-
3 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index a8f811e..91b0c8f 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -362,6 +362,7 @@ struct hns_roce_dca_buf {
void **bufs;
unsigned int max_cnt;
unsigned int shift;
+ unsigned int dcan;
};
struct hns_roce_qp {
@@ -422,6 +423,7 @@ struct hns_roce_dca_attach_attr {
uint32_t sq_offset;
uint32_t sge_offset;
uint32_t rq_offset;
+ bool force;
};
struct hns_roce_dca_detach_attr {
@@ -534,6 +536,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
return 0;
}
+#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64))
+#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64)
+
+static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr)
+{
+ p += HNS_ROCE_BIT_WORD(nr);
+ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr));
+}
+
+static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr)
+{
+ uint64_t mask = HNS_ROCE_BIT_MASK(nr);
+
+ p += HNS_ROCE_BIT_WORD(nr);
+ if (atomic_load(p) & mask)
+ return true;
+
+ return (atomic_fetch_or(p, mask) & mask) != 0;
+}
+
+static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
+{
+ p += HNS_ROCE_BIT_WORD(nr);
+ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
+}
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size);
@@ -614,6 +642,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
uint32_t size, struct hns_roce_dca_buf *buf);
void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
struct hns_roce_dca_detach_attr *attr);
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
+
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 3d41b89..08c0fbc 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
return (idx >= page_count) ? 0 : -ENOMEM;
}
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
+#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
+
+#define MAX_DCA_TRY_LOCK_TIMES 10
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->sync_status;
+ int try_times = 0;
+
+ if (!st || dcan >= ctx->max_qps)
+ return true;
+
+ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan)))
+ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES)
+ return false;
+
+ return true;
+}
+
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->sync_status;
+
+ if (!st || dcan >= ctx->max_qps)
+ return;
+
+ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan));
+}
+
+static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->buf_status;
+
+ if (!st || dcan >= ctx->max_qps)
+ return false;
+
+ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan));
+}
+
#define DCA_EXPAND_MEM_TRY_TIMES 3
int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
struct hns_roce_dca_attach_attr *attr,
@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
int try_times = 0;
int ret = 0;
+ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan))
+ return 0;
+
do {
resp.alloc_pages = 0;
ret = attach_dca_mem(ctx, handle, attr, &resp);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 7e3ad92..028d20c 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -601,6 +601,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
struct hns_roce_qp *qp)
{
struct hns_roce_dca_attach_attr attr = {};
+ bool enable_detach;
uint32_t idx;
int ret;
@@ -622,9 +623,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
attr.rq_offset = idx << qp->rq.wqe_shift;
}
+ enable_detach = check_dca_detach_enable(qp);
+ if (enable_detach &&
+ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan))
+ /* Force attach if failed to sync dca status */
+ attr.force = true;
ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
- qp->buf_size, &qp->dca_wqe);
+ qp->buf_size, &qp->dca_wqe);
+ if (ret && enable_detach)
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
hns_roce_spin_unlock(&qp->rq.hr_lock);
hns_roce_spin_unlock(&qp->sq.hr_lock);
@@ -1450,6 +1458,9 @@ out:
hns_roce_spin_unlock(&qp->sq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
@@ -1582,6 +1593,9 @@ out:
hns_roce_spin_unlock(&qp->rq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE);
@@ -1693,6 +1707,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (attr->qp_state == IBV_QPS_RTR) {
hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
hr_qp->priority = resp_ex.drv_payload.priority;
+ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan;
}
}
@@ -2721,6 +2736,9 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
out:
hns_roce_spin_unlock(&qp->sq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
--
2.30.0

View File

@ -1,766 +0,0 @@
From 6aa5efb3059c66d3d0f49804551b38c5ed827ec1 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:49 +0800
Subject: libhns: Add direct verbs support to config DCA
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
Add two direct verbs to config DCA:
1. hnsdv_open_device() is used to config DCA memory pool.
2. hnsdv_create_qp() is used to create a DCA QP.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
debian/control | 2 +-
debian/ibverbs-providers.install | 1 +
debian/ibverbs-providers.lintian-overrides | 4 +-
debian/ibverbs-providers.symbols | 6 ++
debian/libibverbs-dev.install | 4 +
providers/hns/CMakeLists.txt | 9 ++-
providers/hns/hns_roce_u.c | 92 +++++++++++++++++++---
providers/hns/hns_roce_u.h | 2 +
providers/hns/hns_roce_u_abi.h | 1 +
providers/hns/hns_roce_u_buf.c | 3 +
providers/hns/hns_roce_u_hw_v2.c | 33 +++++++-
providers/hns/hns_roce_u_verbs.c | 58 ++++++++++++--
providers/hns/hnsdv.h | 65 +++++++++++++++
providers/hns/libhns.map | 9 +++
redhat/rdma-core.spec | 5 +-
suse/rdma-core.spec | 21 ++++-
16 files changed, 289 insertions(+), 26 deletions(-)
create mode 100644 providers/hns/hnsdv.h
create mode 100644 providers/hns/libhns.map
diff --git a/debian/control b/debian/control
index 7485ad3..22eb6cd 100644
--- a/debian/control
+++ b/debian/control
@@ -94,7 +94,7 @@ Description: User space provider drivers for libibverbs
- cxgb4: Chelsio T4 iWARP HCAs
- efa: Amazon Elastic Fabric Adapter
- hfi1verbs: Intel Omni-Path HFI
- - hns: HiSilicon Hip06 SoC
+ - hns: HiSilicon Hip08+ SoC
- ipathverbs: QLogic InfiniPath HCAs
- irdma: Intel Ethernet Connection RDMA
- mlx4: Mellanox ConnectX-3 InfiniBand HCAs
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
index 4f971fb..c6ecbbc 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
@@ -1,5 +1,6 @@
etc/libibverbs.d/
usr/lib/*/libefa.so.*
usr/lib/*/libibverbs/lib*-rdmav*.so
+usr/lib/*/libhns.so.*
usr/lib/*/libmlx4.so.*
usr/lib/*/libmlx5.so.*
diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides
index 8a44d54..f6afb70 100644
--- a/debian/ibverbs-providers.lintian-overrides
+++ b/debian/ibverbs-providers.lintian-overrides
@@ -1,2 +1,2 @@
-# libefa, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
-ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmlx4-1 libmlx5-1
+# libefa, libhns, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
+ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmlx4-1 libmlx5-1
diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
index 2c6b330..1844369 100644
--- a/debian/ibverbs-providers.symbols
+++ b/debian/ibverbs-providers.symbols
@@ -162,3 +162,9 @@ libefa.so.1 ibverbs-providers #MINVER#
efadv_create_qp_ex@EFA_1.1 26
efadv_query_device@EFA_1.1 26
efadv_query_ah@EFA_1.1 26
+libhns.so.1 ibverbs-providers #MINVER#
+* Build-Depends-Package: libibverbs-dev
+ HNS_1.0@HNS_1.0 36
+ hnsdv_is_supported@HNS_1.0 36
+ hnsdv_open_device@HNS_1.0 36
+ hnsdv_create_qp@HNS_1.0 36
diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
index bc8caa5..7d6e6a2 100644
--- a/debian/libibverbs-dev.install
+++ b/debian/libibverbs-dev.install
@@ -1,5 +1,6 @@
usr/include/infiniband/arch.h
usr/include/infiniband/efadv.h
+usr/include/infiniband/hnsdv.h
usr/include/infiniband/ib_user_ioctl_verbs.h
usr/include/infiniband/mlx4dv.h
usr/include/infiniband/mlx5_api.h
@@ -14,6 +15,8 @@ usr/include/infiniband/verbs_api.h
usr/lib/*/lib*-rdmav*.a
usr/lib/*/libefa.a
usr/lib/*/libefa.so
+usr/lib/*/libhns.a
+usr/lib/*/libhns.so
usr/lib/*/libibverbs*.so
usr/lib/*/libibverbs.a
usr/lib/*/libmlx4.a
@@ -21,6 +24,7 @@ usr/lib/*/libmlx4.so
usr/lib/*/libmlx5.a
usr/lib/*/libmlx5.so
usr/lib/*/pkgconfig/libefa.pc
+usr/lib/*/pkgconfig/libhns.pc
usr/lib/*/pkgconfig/libibverbs.pc
usr/lib/*/pkgconfig/libmlx4.pc
usr/lib/*/pkgconfig/libmlx5.pc
diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
index 7aaca75..160e1ff 100644
--- a/providers/hns/CMakeLists.txt
+++ b/providers/hns/CMakeLists.txt
@@ -1,7 +1,14 @@
-rdma_provider(hns
+rdma_shared_provider(hns libhns.map
+ 1 1.0.${PACKAGE_VERSION}
hns_roce_u.c
hns_roce_u_buf.c
hns_roce_u_db.c
hns_roce_u_hw_v2.c
hns_roce_u_verbs.c
)
+
+publish_headers(infiniband
+ hnsdv.h
+)
+
+rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index fe30cda..0cf6d4b 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -114,8 +114,60 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
return 0;
}
+bool hnsdv_is_supported(struct ibv_device *device)
+{
+ return is_hns_dev(device);
+}
+
+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
+ struct hnsdv_context_attr *attr)
+{
+ if (!is_hns_dev(device)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ return verbs_open_device(device, attr);
+}
+
+static void set_dca_pool_param(struct hns_roce_context *ctx,
+ struct hnsdv_context_attr *attr, int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE)
+ dca_ctx->unit_size = align(attr->dca_unit_size, page_size);
+ else
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+
+ /* The memory pool cannot be expanded, only init the DCA context. */
+ if (dca_ctx->unit_size == 0)
+ return;
+
+ /* If not set, the memory pool can be expanded unlimitedly. */
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE)
+ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size,
+ dca_ctx->unit_size) *
+ dca_ctx->unit_size;
+ else
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+
+ /* If not set, the memory pool cannot be shrunk. */
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE)
+ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size,
+ dca_ctx->unit_size) *
+ dca_ctx->unit_size;
+ else
+ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE;
+
+ verbs_debug(&ctx->ibv_ctx,
+ "Support DCA, unit %d, max %ld, min %ld Bytes.\n",
+ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size);
+}
+
static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
struct hns_roce_alloc_ucontext_resp *resp,
+ struct hnsdv_context_attr *attr,
int page_size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
@@ -127,14 +179,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
return 0;
+ dca_ctx->unit_size = 0;
+ dca_ctx->mem_cnt = 0;
+
list_head_init(&dca_ctx->mem_list);
ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
if (ret)
return ret;
- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
- dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
- dca_ctx->mem_cnt = 0;
+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
+ return 0;
+
+ set_dca_pool_param(ctx, attr, page_size);
if (mmap_key) {
const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
@@ -185,18 +241,28 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size)
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
+ struct hnsdv_context_attr *attr)
{
cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
- cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
- cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS;
+ HNS_ROCE_CQE_INLINE_FLAGS;
+
+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
+ return;
+
+ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA;
+
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) {
+ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
+ cmd->dca_max_qps = attr->dca_prime_qps;
+ }
}
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
{
+ struct hnsdv_context_attr *ctx_attr = private_data;
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
struct hns_roce_alloc_ucontext_resp resp = {};
struct hns_roce_alloc_ucontext cmd = {};
@@ -209,7 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- ucontext_set_cmd(&cmd, hr_dev->page_size);
+ ucontext_set_cmd(&cmd, ctx_attr);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -255,7 +321,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size))
+ if (init_dca_context(context, cmd_fd,
+ &resp, ctx_attr, hr_dev->page_size))
goto err_free;
if (hns_roce_mmap(hr_dev, context, cmd_fd))
@@ -317,4 +384,11 @@ static const struct verbs_device_ops hns_roce_dev_ops = {
.uninit_device = hns_uninit_device,
.alloc_context = hns_roce_alloc_context,
};
+
+bool is_hns_dev(struct ibv_device *device)
+{
+ struct verbs_device *verbs_device = verbs_get_device(device);
+
+ return verbs_device->ops == &hns_roce_dev_ops;
+}
PROVIDER_DRIVER(hns, hns_roce_dev_ops);
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 91b0c8f..71c35c5 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -562,6 +562,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
}
+bool is_hns_dev(struct ibv_device *device);
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size);
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 0519ac7..1eaf62d 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -36,6 +36,7 @@
#include <infiniband/kern-abi.h>
#include <rdma/hns-abi.h>
#include <kernel-abi/hns-abi.h>
+#include "hnsdv.h"
DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
empty, hns_roce_ib_alloc_pd_resp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 08c0fbc..780683e 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
void hns_roce_free_buf(struct hns_roce_buf *buf)
{
+ if (!buf->buf)
+ return;
+
ibv_dofork_range(buf->buf, buf->length);
munmap(buf->buf, buf->length);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 028d20c..7661863 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1473,6 +1473,7 @@ out:
static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+ int ret = 0;
if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
ibvqp->qp_type != IBV_QPT_UD))
@@ -1481,10 +1482,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq)
return -EINVAL;
- if (check_dca_attach_enable(qp))
- return dca_attach_qp_buf(ctx, qp);
+ if (check_dca_attach_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret)
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u recv, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ }
- return 0;
+ return ret;
}
static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
@@ -1951,6 +1957,9 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
hns_roce_spin_lock(&cq->hr_lock);
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
+
if (err != V2_CQ_OK)
hns_roce_spin_unlock(&cq->hr_lock);
@@ -1965,6 +1974,8 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
int err;
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
if (err != V2_CQ_OK)
return err;
@@ -2159,6 +2170,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
hr_reg_clear(wqe, RCWQE_INLINE);
hr_reg_clear(wqe, RCWQE_SO);
+ if (check_qp_dca_enable(qp))
+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, wqe);
+
qp->sq.wrid[wqe_idx] = wr_id;
qp->cur_wqe = wqe;
qp->sq.head++;
@@ -2691,8 +2705,10 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
static void wr_start(struct ibv_qp_ex *ibv_qp)
{
+ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
enum ibv_qp_state state = ibv_qp->qp_base.state;
+ int ret;
if (state == IBV_QPS_RESET ||
state == IBV_QPS_INIT ||
@@ -2701,6 +2717,17 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
return;
}
+ if (check_qp_dca_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret) {
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u send, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ qp->err = ret;
+ return;
+ }
+ }
+
hns_roce_spin_lock(&qp->sq.hr_lock);
qp->sge_info.start_idx = qp->next_sge;
qp->rb_sq_head = qp->sq.head;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 749b01b..282ab74 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -961,6 +961,15 @@ enum {
IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
};
+enum {
+ SEND_OPS_FLAG_MASK =
+ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
+ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM |
+ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV |
+ IBV_QP_EX_WITH_SEND_WITH_INV,
+};
+
static int check_qp_create_mask(struct hns_roce_context *ctx,
struct ibv_qp_init_attr_ex *attr)
{
@@ -969,6 +978,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK))
return -EOPNOTSUPP;
+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS &&
+ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK))
+ return -EOPNOTSUPP;
+
switch (attr->qp_type) {
case IBV_QPT_UD:
if (hr_dev->hw_version == HNS_ROCE_HW_VER2)
@@ -1165,9 +1178,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
return 0;
}
-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr)
{
- if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
+ /* DCA pool disable */
+ if (!dca_ctx->unit_size)
+ return false;
+
+ /* Unsupport type */
+ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND)
+ return false;
+
+ if (hns_attr &&
+ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) &&
+ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE))
return true;
return false;
@@ -1185,6 +1210,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
}
static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1208,7 +1234,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
goto err_alloc;
}
- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) &&
+ ctx->dca_ctx.max_size > 0) {
/* when DCA is enabled, use a buffer list to store page addr */
qp->buf.buf = NULL;
qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
@@ -1216,6 +1243,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
if (!qp->dca_wqe.bufs)
goto err_alloc;
+ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n");
} else {
if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
HNS_HW_PAGE_SIZE))
@@ -1478,6 +1506,7 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
}
static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
struct hns_roce_qp *qp,
struct hns_roce_context *ctx)
{
@@ -1487,7 +1516,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
return -ENOMEM;
- ret = qp_alloc_wqe(attr, qp, ctx);
+ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
if (ret)
return ret;
@@ -1510,7 +1539,8 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
}
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
- struct ibv_qp_init_attr_ex *attr)
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr)
{
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
struct hns_roce_qp *qp;
@@ -1533,7 +1563,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_spinlock;
- ret = hns_roce_alloc_qp_buf(attr, qp, context);
+ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context);
if (ret)
goto err_buf;
@@ -1587,7 +1617,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
attrx.pd = pd;
- qp = create_qp(pd->context, &attrx);
+ qp = create_qp(pd->context, &attrx, NULL);
if (qp)
memcpy(attr, &attrx, sizeof(*attr));
@@ -1597,7 +1627,19 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context,
struct ibv_qp_init_attr_ex *attr)
{
- return create_qp(context, attr);
+ return create_qp(context, attr, NULL);
+}
+
+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct hnsdv_qp_init_attr *hns_attr)
+{
+ if (!is_hns_dev(context->device)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ return create_qp(context, qp_attr, hns_attr);
}
struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
new file mode 100644
index 0000000..cfe1611
--- /dev/null
+++ b/providers/hns/hnsdv.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright (c) 2021 HiSilicon Limited.
+ */
+
+#ifndef __HNSDV_H__
+#define __HNSDV_H__
+
+#include <stdio.h>
+#include <stdbool.h>
+
+#include <sys/types.h>
+
+#include <infiniband/verbs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum hnsdv_context_attr_flags {
+ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0,
+};
+
+enum hnsdv_context_comp_mask {
+ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0,
+ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1,
+ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2,
+ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3,
+};
+
+struct hnsdv_context_attr {
+ uint64_t flags; /* Use enum hnsdv_context_attr_flags */
+ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */
+ uint32_t dca_prime_qps;
+ uint32_t dca_unit_size;
+ uint64_t dca_max_size;
+ uint64_t dca_min_size;
+};
+
+bool hnsdv_is_supported(struct ibv_device *device);
+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
+ struct hnsdv_context_attr *attr);
+
+enum hnsdv_qp_create_flags {
+ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
+};
+
+enum hnsdv_qp_init_attr_mask {
+ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
+};
+
+struct hnsdv_qp_init_attr {
+ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
+ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
+};
+
+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct hnsdv_qp_init_attr *hns_qp_attr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HNSDV_H__ */
diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
new file mode 100644
index 0000000..aed491c
--- /dev/null
+++ b/providers/hns/libhns.map
@@ -0,0 +1,9 @@
+/* Export symbols should be added below according to
+ Documentation/versioning.md document. */
+HNS_1.0 {
+ global:
+ hnsdv_is_supported;
+ hnsdv_open_device;
+ hnsdv_create_qp;
+ local: *;
+};
diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index f1b196a..321578c 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -150,6 +150,8 @@ Provides: libefa = %{version}-%{release}
Obsoletes: libefa < %{version}-%{release}
Provides: libhfi1 = %{version}-%{release}
Obsoletes: libhfi1 < %{version}-%{release}
+Provides: libhns = %{version}-%{release}
+Obsoletes: libhns < %{version}-%{release}
Provides: libipathverbs = %{version}-%{release}
Obsoletes: libipathverbs < %{version}-%{release}
Provides: libirdma = %{version}-%{release}
@@ -177,7 +179,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- libcxgb4: Chelsio T4 iWARP HCA
- libefa: Amazon Elastic Fabric Adapter
- libhfi1: Intel Omni-Path HFI
-- libhns: HiSilicon Hip06 SoC
+- libhns: HiSilicon Hip08+ SoC
- libipathverbs: QLogic InfiniPath HCA
- libirdma: Intel Ethernet Connection RDMA
- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
@@ -562,6 +564,7 @@ fi
%dir %{_sysconfdir}/libibverbs.d
%dir %{_libdir}/libibverbs
%{_libdir}/libefa.so.*
+%{_libdir}/libhns.so.*
%{_libdir}/libibverbs*.so.*
%{_libdir}/libibverbs/*.so
%{_libdir}/libmlx5.so.*
diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec
index bd1faec..ce19db1 100644
--- a/suse/rdma-core.spec
+++ b/suse/rdma-core.spec
@@ -35,6 +35,7 @@ License: BSD-2-Clause OR GPL-2.0-only
Group: Productivity/Networking/Other
%define efa_so_major 1
+%define hns_so_major 1
%define verbs_so_major 1
%define rdmacm_so_major 1
%define umad_so_major 3
@@ -44,6 +45,7 @@ Group: Productivity/Networking/Other
%define mad_major 5
%define efa_lname libefa%{efa_so_major}
+%define hns_lname libhns%{hns_so_major}
%define verbs_lname libibverbs%{verbs_so_major}
%define rdmacm_lname librdmacm%{rdmacm_so_major}
%define umad_lname libibumad%{umad_so_major}
@@ -157,6 +159,7 @@ Requires: %{umad_lname} = %{version}-%{release}
Requires: %{verbs_lname} = %{version}-%{release}
%if 0%{?dma_coherent}
Requires: %{efa_lname} = %{version}-%{release}
+Requires: %{hns_lname} = %{version}-%{release}
Requires: %{mlx4_lname} = %{version}-%{release}
Requires: %{mlx5_lname} = %{version}-%{release}
%endif
@@ -197,6 +200,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
Obsoletes: libcxgb4-rdmav2 < %{version}-%{release}
Obsoletes: libefa-rdmav2 < %{version}-%{release}
Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release}
+Obsoletes: libhns-rdmav2 < %{version}-%{release}
Obsoletes: libipathverbs-rdmav2 < %{version}-%{release}
Obsoletes: libmlx4-rdmav2 < %{version}-%{release}
Obsoletes: libmlx5-rdmav2 < %{version}-%{release}
@@ -205,6 +209,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release}
Obsoletes: librxe-rdmav2 < %{version}-%{release}
%if 0%{?dma_coherent}
Requires: %{efa_lname} = %{version}-%{release}
+Requires: %{hns_lname} = %{version}-%{release}
Requires: %{mlx4_lname} = %{version}-%{release}
Requires: %{mlx5_lname} = %{version}-%{release}
%endif
@@ -223,7 +228,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- libcxgb4: Chelsio T4 iWARP HCA
- libefa: Amazon Elastic Fabric Adapter
- libhfi1: Intel Omni-Path HFI
-- libhns: HiSilicon Hip06 SoC
+- libhns: HiSilicon Hip08+ SoC
- libipathverbs: QLogic InfiniPath HCA
- libirdma: Intel Ethernet Connection RDMA
- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
@@ -250,6 +255,13 @@ Group: System/Libraries
%description -n %efa_lname
This package contains the efa runtime library.
+%package -n %hns_lname
+Summary: HNS runtime library
+Group: System/Libraries
+
+%description -n %hns_lname
+This package contains the hns runtime library.
+
%package -n %mlx4_lname
Summary: MLX4 runtime library
Group: System/Libraries
@@ -493,6 +505,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh
%post -n %efa_lname -p /sbin/ldconfig
%postun -n %efa_lname -p /sbin/ldconfig
+%post -n %hns_lname -p /sbin/ldconfig
+%postun -n %hns_lname -p /sbin/ldconfig
+
%post -n %mlx4_lname -p /sbin/ldconfig
%postun -n %mlx4_lname -p /sbin/ldconfig
@@ -689,6 +704,10 @@ done
%defattr(-,root,root)
%{_libdir}/libefa*.so.*
+%files -n %hns_lname
+%defattr(-,root,root)
+%{_libdir}/libhns*.so.*
+
%files -n %mlx4_lname
%defattr(-,root,root)
%{_libdir}/libmlx4*.so.*
--
2.30.0

View File

@ -1,28 +0,0 @@
From 8a5429161e6932d4031ec705b695973d67729c71 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 14 Dec 2022 16:37:26 +0800
Subject: [PATCH rdma-core 1/2] Update kernel headers
To commit ?? ("RDMA/hns: Kernel notify usr space to stop ring db").
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 6950841..5988a62 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -127,6 +127,7 @@ struct hns_roce_ib_alloc_ucontext_resp {
__u32 dca_qps;
__u32 dca_mmap_size;
__aligned_u64 dca_mmap_key;
+ __aligned_u64 reset_mmap_key;
};
enum hns_roce_uctx_comp_mask {
--
2.30.0

View File

@ -1,195 +0,0 @@
From c3ee7375c80c7a8f0a943679566c87f17f87aa17 Mon Sep 17 00:00:00 2001
From: Guofeng Yue <yueguofeng@hisilicon.com>
Date: Mon, 9 May 2022 16:03:38 +0800
Subject: [PATCH rdma-core 2/2] libhns: Add reset stop flow mechanism
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I65WI7
------------------------------------------------------------------
Add an interface to the user space, which is used to receive
the kernel reset state. After receiving the reset flag, the
user space stops sending db.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 30 ++++++++++++++++++++++++++++--
providers/hns/hns_roce_u.h | 5 +++++
providers/hns/hns_roce_u_db.h | 8 +++++++-
providers/hns/hns_roce_u_hw_v2.c | 19 ++++++++++++++-----
4 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 0cf6d4b..3d29838 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -221,6 +221,24 @@ static void uninit_dca_context(struct hns_roce_context *ctx)
pthread_spin_destroy(&dca_ctx->lock);
}
+static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
+ struct hns_roce_alloc_ucontext_resp *resp,
+ int page_size)
+{
+ uint64_t reset_mmap_key = resp->reset_mmap_key;
+
+ /* The reset mmap key is 0, which means it is not supported. */
+ if (reset_mmap_key == 0)
+ return 0;
+
+ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
+ cmd_fd, reset_mmap_key);
+ if (ctx->reset_state == MAP_FAILED)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int hns_roce_mmap(struct hns_roce_device *hr_dev,
struct hns_roce_context *context, int cmd_fd)
{
@@ -325,8 +343,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
&resp, ctx_attr, hr_dev->page_size))
goto err_free;
+ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
+ goto reset_free;
+
if (hns_roce_mmap(hr_dev, context, cmd_fd))
- goto dca_free;
+ goto uar_free;
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
@@ -335,7 +356,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return &context->ibv_ctx;
-dca_free:
+uar_free:
+ if (context->reset_state)
+ munmap(context->reset_state, hr_dev->page_size);
+reset_free:
uninit_dca_context(context);
err_free:
verbs_uninit_context(&context->ibv_ctx);
@@ -349,6 +373,8 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
struct hns_roce_context *context = to_hr_ctx(ibctx);
munmap(context->uar, hr_dev->page_size);
+ if (context->reset_state)
+ munmap(context->reset_state, hr_dev->page_size);
uninit_dca_context(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 71c35c5..76c7adb 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -226,9 +226,14 @@ struct hns_roce_dca_ctx {
atomic_bitmap_t *sync_status;
};
+struct hns_roce_v2_reset_state {
+ uint32_t is_reset;
+};
+
struct hns_roce_context {
struct verbs_context ibv_ctx;
void *uar;
+ void *reset_state;
pthread_spinlock_t uar_lock;
struct {
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
index 8c47a53..de288de 100644
--- a/providers/hns/hns_roce_u_db.h
+++ b/providers/hns/hns_roce_u_db.h
@@ -40,8 +40,14 @@
#define HNS_ROCE_WORD_NUM 2
-static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
+static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest,
+ __le32 val[HNS_ROCE_WORD_NUM])
{
+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
+
+ if (state && state->is_reset)
+ return;
+
mmio_write64_le(dest, *(__le64 *)val);
}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 7661863..d0067d3 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -298,7 +298,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
hr_reg_write(&rq_db, DB_PI, rq_head);
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)&rq_db);
}
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
@@ -312,7 +313,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
hr_reg_write(&sq_db, DB_PI, qp->sq.head);
hr_reg_write(&sq_db, DB_SL, qp->sl);
- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
+ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db);
}
static void hns_roce_write512(uint64_t *dest, uint64_t *val)
@@ -323,6 +324,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
{
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
+
+ if (state && state->is_reset)
+ return;
/* All kinds of DirectWQE have the same header field layout */
hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
@@ -342,7 +349,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)&cq_db);
}
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
@@ -857,7 +865,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)&cq_db);
return 0;
}
@@ -1934,7 +1943,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
update_srq_db(&srq_db, srq);
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
(__le32 *)&srq_db);
}
--
2.30.0

View File

@ -1,60 +0,0 @@
From 39c7b8eaeb3a6c855a49885b6b6de877268d36a7 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 12 Apr 2023 17:01:08 +0800
Subject: [PATCH 1/2] Update kernel headers
To commit ?? ("RDMA/hns: Support congestion control algorithm
configuration at QP granularity").
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 5988a62..bd19927 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -63,6 +63,18 @@ struct hns_roce_ib_create_srq_resp {
__u32 reserved;
};
+enum hns_roce_create_qp_comp_mask {
+ HNS_ROCE_CREATE_QP_MASK_CREATE_FLAGS = 1 << 0,
+ HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 1,
+};
+
+enum hns_roce_congest_type_flags {
+ HNS_ROCE_CREATE_QP_FLAGS_DCQCN = 1 << 0,
+ HNS_ROCE_CREATE_QP_FLAGS_LDCP = 1 << 1,
+ HNS_ROCE_CREATE_QP_FLAGS_HC3 = 1 << 2,
+ HNS_ROCE_CREATE_QP_FLAGS_DIP = 1 << 3,
+};
+
struct hns_roce_ib_create_qp {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
@@ -71,6 +83,9 @@ struct hns_roce_ib_create_qp {
__u8 sq_no_prefetch;
__u8 reserved[5];
__aligned_u64 sdb_addr;
+ __aligned_u64 comp_mask;
+ __aligned_u64 create_flags;
+ __aligned_u64 congest_type_flags;
};
enum hns_roce_qp_cap_flags {
@@ -123,7 +138,8 @@ struct hns_roce_ib_alloc_ucontext_resp {
__u32 config;
__u32 max_inline_data;
__u8 mac_type;
- __u8 rsv1[7];
+ __u8 congest_type;
+ __u8 rsv1[6];
__u32 dca_qps;
__u32 dca_mmap_size;
__aligned_u64 dca_mmap_key;
--
2.25.1

View File

@ -1,307 +0,0 @@
From 99e1e64edab954ce1895d83a3d6f4317bc12c444 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 12 Apr 2023 17:01:09 +0800
Subject: [PATCH] libhns: Support congestion control algorithm configuration
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I6N1G4
---------------------------------------------------------------
Added the use of direct verbs to implement QP-level
user-configurable congestion control algorithms. Among them,
the user mode driver mainly provides interfaces for users to
choose, and the kernel mode driver is responsible for filling
the resources of different algorithms and providing the
supported algorithm types for user mode.
At the same time, provide a direct verbs interface for users to
query the type of congestion control algorithm.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 1 +
providers/hns/hns_roce_u.h | 6 ++
providers/hns/hns_roce_u_verbs.c | 107 +++++++++++++++++++++++++++++--
providers/hns/hnsdv.h | 22 +++++++
providers/hns/libhns.map | 1 +
5 files changed, 131 insertions(+), 6 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 3d29838..87f9ed8 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -299,6 +299,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
goto err_free;
hr_dev->mac_type = resp.mac_type;
+ hr_dev->congest_type = resp.congest_type;
if (!resp.cqe_size)
context->cqe_size = HNS_ROCE_CQE_SIZE;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 76c7adb..41e9599 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -172,6 +172,7 @@ struct hns_roce_device {
const struct hns_roce_u_hw *u_hw;
int hw_version;
uint8_t mac_type;
+ uint8_t congest_type;
};
struct hns_roce_buf {
@@ -230,6 +231,11 @@ struct hns_roce_v2_reset_state {
uint32_t is_reset;
};
+struct hns_roce_cmd_flag {
+ uint32_t create_flags;
+ uint32_t congest_type_flags;
+};
+
struct hns_roce_context {
struct verbs_context ibv_ctx;
void *uar;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 282ab74..499735c 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -89,10 +89,10 @@ int hns_roce_u_query_device(struct ibv_context *context,
struct ibv_device_attr_ex *attr, size_t attr_size)
{
struct ib_uverbs_ex_query_device_resp resp;
+ unsigned int major, minor, sub_minor;
size_t resp_size = sizeof(resp);
- int ret;
uint64_t raw_fw_ver;
- unsigned int major, minor, sub_minor;
+ int ret;
ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp,
&resp_size);
@@ -110,6 +110,27 @@ int hns_roce_u_query_device(struct ibv_context *context,
return 0;
}
+int hnsdv_query_device(struct ibv_context *context,
+ struct hnsdv_context *attrs_out)
+{
+ struct hns_roce_device *hr_dev = to_hr_dev(context->device);
+
+ if (!hr_dev) {
+ verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n");
+ return EOPNOTSUPP;
+ }
+
+ if (!attrs_out)
+ return EINVAL;
+
+ memset(attrs_out, 0, sizeof(*attrs_out));
+
+ attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE;
+ attrs_out->congest_type = hr_dev->congest_type;
+
+ return 0;
+}
+
int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
struct ibv_port_attr *attr)
{
@@ -956,6 +977,67 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
return 0;
}
+enum {
+ HNSDV_QP_SUP_COMP_MASK = HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS |
+ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE,
+};
+
+static int check_qp_congest_type(struct hns_roce_context *ctx,
+ struct hnsdv_qp_init_attr *hns_attr,
+ struct hns_roce_cmd_flag *cmd_flag)
+{
+ struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+
+ if (!check_comp_mask(hns_attr->congest_type, hr_dev->congest_type)) {
+ verbs_err(&ctx->ibv_ctx, "unsupported congest type 0x%x.\n",
+ hns_attr->congest_type);
+ return -EOPNOTSUPP;
+ }
+
+ switch (hns_attr->congest_type) {
+ case HNSDV_QP_CREATE_ENABLE_DCQCN:
+ cmd_flag->congest_type_flags |= HNS_ROCE_CREATE_QP_FLAGS_DCQCN;
+ break;
+ case HNSDV_QP_CREATE_ENABLE_LDCP:
+ cmd_flag->congest_type_flags |= HNS_ROCE_CREATE_QP_FLAGS_LDCP;
+ break;
+ case HNSDV_QP_CREATE_ENABLE_HC3:
+ cmd_flag->congest_type_flags |= HNS_ROCE_CREATE_QP_FLAGS_HC3;
+ break;
+ case HNSDV_QP_CREATE_ENABLE_DIP:
+ cmd_flag->congest_type_flags |= HNS_ROCE_CREATE_QP_FLAGS_DIP;
+ break;
+ default:
+ verbs_err(&ctx->ibv_ctx,
+ "unsupported congestion control algorithm configuration.\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
+ struct hnsdv_qp_init_attr *hns_attr,
+ struct hns_roce_cmd_flag *cmd_flag)
+{
+ int ret;
+
+ if (!hns_attr)
+ return 0;
+
+ if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) {
+ verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n",
+ hns_attr->comp_mask);
+ return -EINVAL;
+ }
+
+ ret = check_qp_congest_type(ctx, hns_attr, cmd_flag);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
enum {
CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD |
IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
@@ -1050,7 +1132,9 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
}
static int verify_qp_create_attr(struct hns_roce_context *ctx,
- struct ibv_qp_init_attr_ex *attr)
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
+ struct hns_roce_cmd_flag *cmd_flag)
{
int ret;
@@ -1058,6 +1142,10 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
if (ret)
return ret;
+ ret = check_hnsdv_qp_attr(ctx, hns_attr, cmd_flag);
+ if (ret)
+ return ret;
+
return verify_qp_create_cap(ctx, attr);
}
@@ -1452,7 +1540,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp,
struct hns_roce_context *ctx,
- uint64_t *dwqe_mmap_key)
+ uint64_t *dwqe_mmap_key,
+ struct hns_roce_cmd_flag *cmd_flag)
{
struct hns_roce_create_qp_ex_resp resp_ex = {};
struct hns_roce_create_qp_ex cmd_ex = {};
@@ -1464,6 +1553,11 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
cmd_ex.log_sq_stride = qp->sq.wqe_shift;
cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt);
+ if (cmd_flag->congest_type_flags) {
+ cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE;
+ cmd_ex.congest_type_flags = cmd_flag->congest_type_flags;
+ }
+
ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr,
&cmd_ex.ibv_cmd, sizeof(cmd_ex),
&resp_ex.ibv_resp, sizeof(resp_ex));
@@ -1543,11 +1637,12 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
struct hnsdv_qp_init_attr *hns_attr)
{
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
+ struct hns_roce_cmd_flag cmd_flag = {};
struct hns_roce_qp *qp;
uint64_t dwqe_mmap_key;
int ret;
- ret = verify_qp_create_attr(context, attr);
+ ret = verify_qp_create_attr(context, attr, hns_attr, &cmd_flag);
if (ret)
goto err;
@@ -1567,7 +1662,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_buf;
- ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key, &cmd_flag);
if (ret)
goto err_cmd;
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index cfe1611..e15b428 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -45,19 +45,41 @@ enum hnsdv_qp_create_flags {
HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
};
+enum hnsdv_qp_congest_ctrl_type {
+ HNSDV_QP_CREATE_ENABLE_DCQCN = 1 << 0,
+ HNSDV_QP_CREATE_ENABLE_LDCP = 1 << 1,
+ HNSDV_QP_CREATE_ENABLE_HC3 = 1 << 2,
+ HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3,
+};
+
enum hnsdv_qp_init_attr_mask {
HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
+ HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1,
};
struct hnsdv_qp_init_attr {
uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
+ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
};
struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
struct ibv_qp_init_attr_ex *qp_attr,
struct hnsdv_qp_init_attr *hns_qp_attr);
+enum hnsdv_query_context_comp_mask {
+ HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0,
+};
+
+struct hnsdv_context {
+ uint64_t comp_mask; /* use enum hnsdv_query_context_comp_mask */
+ uint64_t flags;
+ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
+};
+
+int hnsdv_query_device(struct ibv_context *ctx_in,
+ struct hnsdv_context *attrs_out);
+
#ifdef __cplusplus
}
#endif
diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
index aed491c..ebf28eb 100644
--- a/providers/hns/libhns.map
+++ b/providers/hns/libhns.map
@@ -5,5 +5,6 @@ HNS_1.0 {
hnsdv_is_supported;
hnsdv_open_device;
hnsdv_create_qp;
+ hnsdv_query_device;
local: *;
};
--
2.25.1

View File

@ -1,28 +0,0 @@
From 0c132b9216282269974bf5d21f877413cc222950 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Mon, 17 Apr 2023 09:48:09 +0800
Subject: [PATCH 1/2] Update kernel headers
To commit ?? ("RDMA/hns: Add SVE DIRECT WQE flag to support libhns").
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index bd19927..cab941f 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -92,6 +92,7 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+ HNS_ROCE_QP_CAP_SVE_DIRECT_WQE = 1 << 3,
HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
--
2.25.1

View File

@ -1,125 +0,0 @@
From 6f08530cae5de66fabfae4cb29729a18b0e86365 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Mon, 17 Apr 2023 09:48:10 +0800
Subject: [PATCH 2/2] libhns: Add support for SVE Direct WQE
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I6VLLM
---------------------------------------------------------------
Some Kunpeng SoCs do not support the DWQE through NEON
instructions. In this case, the IO path works normally,
but the performance will deteriorate.
For these SoCs that do not support NEON DWQE, they support
DWQE through SVE instructions. This patch supports SVE DWQE
to guarantee the performance of these SoCs. In addition, in
this scenario, DWQE only supports acceleration through SVE's
ldr and str instructions. Other load and store instructions
also cause performance degradation.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
CMakeLists.txt | 1 +
buildlib/RDMA_EnableCStd.cmake | 17 +++++++++++++++++
providers/hns/CMakeLists.txt | 5 +++++
providers/hns/hns_roce_u_hw_v2.c | 21 ++++++++++++++++++++-
4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 787c8be..bc4437b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -399,6 +399,7 @@ if (NOT HAVE_SPARSE)
endif()
RDMA_Check_SSE(HAVE_TARGET_SSE)
+RDMA_Check_SVE(HAVE_TARGET_SVE)
# Enable development support features
# Prune unneeded shared libraries during linking
diff --git a/buildlib/RDMA_EnableCStd.cmake b/buildlib/RDMA_EnableCStd.cmake
index 3c42824..2b56f42 100644
--- a/buildlib/RDMA_EnableCStd.cmake
+++ b/buildlib/RDMA_EnableCStd.cmake
@@ -127,3 +127,20 @@ int main(int argc, char *argv[])
endif()
set(${TO_VAR} "${HAVE_TARGET_SSE}" PARENT_SCOPE)
endFunction()
+
+function(RDMA_Check_SVE TO_VAR)
+ set(SVE_CHECK_PROGRAM "
+int main(int argc, char *argv[])
+{
+ return 0;
+}
+")
+
+ RDMA_Check_C_Compiles(HAVE_TARGET_SVE "${SVE_CHECK_PROGRAM}" "-march=armv8.2-a+sve")
+ if(NOT HAVE_TARGET_SVE)
+ message("SVE is not supported")
+ else()
+ set(SVE_FLAGS "-march=armv8.2-a+sve" PARENT_SCOPE)
+ endif()
+ set(${TO_VAR} "${HAVE_TARGET_SVE}" PARENT_SCOPE)
+endFunction()
\ No newline at end of file
diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
index 160e1ff..ef031a8 100644
--- a/providers/hns/CMakeLists.txt
+++ b/providers/hns/CMakeLists.txt
@@ -11,4 +11,9 @@ publish_headers(infiniband
hnsdv.h
)
+if (HAVE_TARGET_SVE)
+ add_definitions("-DHNS_SVE")
+ set_source_files_properties(hns_roce_u_hw_v2.c PROPERTIES COMPILE_FLAGS "${SVE_FLAGS}")
+endif()
+
rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index d0067d3..a49b50d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -321,6 +321,22 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
}
+#if defined(HNS_SVE)
+static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val)
+{
+ asm volatile(
+ "ldr z0, [%0]\n"
+ "str z0, [%1]\n"
+ ::"r" (val), "r"(dest):"cc", "memory"
+ );
+}
+#else
+static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val)
+{
+ return;
+}
+#endif
+
static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
{
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
@@ -337,7 +353,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT);
hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head);
- hns_roce_write512(qp->sq.db_reg, wqe);
+ if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE)
+ hns_roce_sve_write512(qp->sq.db_reg, wqe);
+ else
+ hns_roce_write512(qp->sq.db_reg, wqe);
}
static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
--
2.25.1

View File

@ -1,91 +0,0 @@
From b5127a009336e0e6947433148c6c7422c277bce7 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Sat, 6 May 2023 18:06:38 +0800
Subject: [PATCH 1/3] libhns: Fix the sge num problem of atomic op
mainline inclusion
commit b4793235
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I72EWP
CVE: NA
----------------------------------------------------------------------
The hns hardware logic requires wr->num_sge to be 1 when
performing atomic operations. The code does not judge this
condition, and the current patch adds this constraint.
Fixes: 3507f87f7760 ("libhns: Optimize set_sge process")
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Signed-off-by: Zhou Juan <nnuzj07170227@163.com>
---
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index a49b50d..5533cdb 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -106,6 +106,9 @@ static int set_atomic_seg(struct hns_roce_qp *qp, struct ibv_send_wr *wr,
void *buf[ATOMIC_BUF_NUM_MAX];
unsigned int buf_sge_num;
+ /* There is only one sge in atomic wr, and data_len is the data length
+ * in the first sge
+ */
if (is_std_atomic(data_len)) {
if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = htole64(wr->wr.atomic.swap);
@@ -923,16 +926,19 @@ static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
uint32_t mask = qp->ex_sge.sge_cnt - 1;
uint32_t index = sge_info->start_idx;
struct ibv_sge *sge = wr->sg_list;
+ int total_sge = wr->num_sge;
+ bool flag = false;
uint32_t len = 0;
uint32_t cnt = 0;
- int flag;
int i;
- flag = (wr->send_flags & IBV_SEND_INLINE &&
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
+ if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD ||
+ wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP)
+ total_sge = 1;
+ else
+ flag = !!(wr->send_flags & IBV_SEND_INLINE);
- for (i = 0; i < wr->num_sge; i++, sge++) {
+ for (i = 0; i < total_sge; i++, sge++) {
if (unlikely(!sge->length))
continue;
@@ -2267,6 +2273,7 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
struct hns_roce_v2_wqe_data_seg *dseg;
+ uint32_t opcode;
if (!wqe)
return;
@@ -2276,9 +2283,15 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
return;
}
+
hr_reg_write(wqe, RCWQE_MSG_START_SGE_IDX,
qp->sge_info.start_idx & (qp->ex_sge.sge_cnt - 1));
+ opcode = hr_reg_read(wqe, RCWQE_OPCODE);
+ if (opcode == HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP ||
+ opcode == HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD)
+ num_sge = 1;
+
dseg = (void *)(wqe + 1);
set_sgl_rc(dseg, qp, sg_list, num_sge);
--
2.25.1

View File

@ -1,55 +0,0 @@
From 2653621c332c79ba591d76a442061bd13ad23030 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Sat, 6 May 2023 18:06:39 +0800
Subject: [PATCH 2/3] libhns: Fix sge tail_len overflow
mainline inclusion
commit cd9c9ea5
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I72F0C
CVE: NA
----------------------------------------------------------------------
In the sq inline scenario, when num_sge in post_send is not 1, sge
array appears in the for loop without rotation and directly copy
out of bounds.
The fill_ext_sge_inl_data() calculates the remaining length of the
array by subtracting the current address from the tail address. If
the length is not sufficient, redundant data will be copied after
rotating the array. However, in the code, sge_cnt & sge_mask always
equals to 0, which causes the tail address of the array to be
mistakenly taken as the first address. Additionally, tail_len will
be either 0 or may overflow when calculating this value. After
overflowing to a very large number, the driver makes an incorrect
judgment and copies all the data directly. When the data length
exceeds the remaining length, an out-of-bounds problem with the
array will occur.
This patch modifies tail_bound_addr(tail pointer) to the actual sge
array tail address.
Fixes: 2ced2bc4d1d4 ("libhns: Fix out-of-bounds write when filling inline data into extended sge space")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Signed-off-by: Zhou Juan <nnuzj07170227@163.com>
---
providers/hns/hns_roce_u_hw_v2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 5533cdb..3d46f35 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1028,7 +1028,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
- tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt & sge_mask);
+ tail_bound_addr = get_send_sge_ex(qp, qp->ex_sge.sge_cnt);
for (i = 0; i < num_buf; i++) {
tail_len = (uintptr_t)tail_bound_addr - (uintptr_t)dst_addr;
--
2.25.1

View File

@ -1,153 +0,0 @@
From b3cea3522d575fdb60b6f426e43d45cec3deb847 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Sat, 6 May 2023 18:06:40 +0800
Subject: [PATCH 3/3] libhns: Disable local invalidate operation
mainline inclusion
commit d8eec872
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I72F0U
CVE: NA
----------------------------------------------------------------------
Currently local invalidate operation don't work properly.
Disable it for the time being.
HIP08 and HIP09 hardware does not support this feature, so
delete the associated code.
Fixes: a9ae7e9bfb5d ("libhns: Add local invalidate MR support for hip08")
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Zhou Juan <nnuzj07170227@163.com>
---
providers/hns/hns_roce_u_hw_v2.c | 30 +-----------------------------
providers/hns/hns_roce_u_hw_v2.h | 2 --
2 files changed, 1 insertion(+), 31 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 3d46f35..b929bbf 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -50,7 +50,6 @@ static const uint32_t hns_roce_opcode[] = {
HR_IBV_OPC_MAP(RDMA_READ, RDMA_READ),
HR_IBV_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOMIC_COM_AND_SWAP),
HR_IBV_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOMIC_FETCH_AND_ADD),
- HR_IBV_OPC_MAP(LOCAL_INV, LOCAL_INV),
HR_IBV_OPC_MAP(BIND_MW, BIND_MW_TYPE),
HR_IBV_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
};
@@ -429,7 +428,6 @@ static const unsigned int wc_send_op_map[] = {
[HNS_ROCE_SQ_OP_RDMA_READ] = IBV_WC_RDMA_READ,
[HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP] = IBV_WC_COMP_SWAP,
[HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD] = IBV_WC_FETCH_ADD,
- [HNS_ROCE_SQ_OP_LOCAL_INV] = IBV_WC_LOCAL_INV,
[HNS_ROCE_SQ_OP_BIND_MW] = IBV_WC_BIND_MW,
};
@@ -597,9 +595,6 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
wc->wc_flags = IBV_WC_WITH_IMM;
break;
- case HNS_ROCE_SQ_OP_LOCAL_INV:
- wc->wc_flags = IBV_WC_WITH_INV;
- break;
case HNS_ROCE_SQ_OP_RDMA_READ:
case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
@@ -1338,9 +1333,6 @@ static int check_rc_opcode(struct hns_roce_rc_sq_wqe *wqe,
wqe->rkey = htole32(wr->wr.atomic.rkey);
wqe->va = htole64(wr->wr.atomic.remote_addr);
break;
- case IBV_WR_LOCAL_INV:
- hr_reg_enable(wqe, RCWQE_SO);
- /* fallthrough */
case IBV_WR_SEND_WITH_INV:
wqe->inv_key = htole32(wr->invalidate_rkey);
break;
@@ -1372,7 +1364,6 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
!!(wr->send_flags & IBV_SEND_SOLICITED));
hr_reg_write_bool(wqe, RCWQE_INLINE,
!!(wr->send_flags & IBV_SEND_INLINE));
- hr_reg_clear(wqe, RCWQE_SO);
ret = check_rc_opcode(rc_sq_wqe, wr);
if (ret)
@@ -2092,8 +2083,6 @@ static unsigned int get_wc_flags_for_sq(uint8_t opcode)
case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
return IBV_WC_WITH_IMM;
- case HNS_ROCE_SQ_OP_LOCAL_INV:
- return IBV_WC_WITH_INV;
default:
return 0;
}
@@ -2202,7 +2191,6 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
hr_reg_write_bool(wqe, RCWQE_FENCE, send_flags & IBV_SEND_FENCE);
hr_reg_write_bool(wqe, RCWQE_SE, send_flags & IBV_SEND_SOLICITED);
hr_reg_clear(wqe, RCWQE_INLINE);
- hr_reg_clear(wqe, RCWQE_SO);
if (check_qp_dca_enable(qp))
fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, wqe);
@@ -2332,20 +2320,6 @@ static void wr_send_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey)
wqe->inv_key = htole32(invalidate_rkey);
}
-static void wr_local_inv_rc(struct ibv_qp_ex *ibv_qp, uint32_t invalidate_rkey)
-{
- struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
- struct hns_roce_rc_sq_wqe *wqe;
-
- wqe = init_rc_wqe(qp, ibv_qp->wr_id, HNS_ROCE_WQE_OP_LOCAL_INV);
- if (!wqe)
- return;
-
- hr_reg_enable(wqe, RCWQE_SO);
- wqe->inv_key = htole32(invalidate_rkey);
- enable_wqe(qp, wqe, qp->sq.head);
-}
-
static void wr_set_xrc_srqn(struct ibv_qp_ex *ibv_qp, uint32_t remote_srqn)
{
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
@@ -2833,8 +2807,7 @@ enum {
IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
IBV_QP_EX_WITH_RDMA_READ |
IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
- IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD |
- IBV_QP_EX_WITH_LOCAL_INV,
+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD,
HNS_SUPPORTED_SEND_OPS_FLAGS_UD =
IBV_QP_EX_WITH_SEND |
IBV_QP_EX_WITH_SEND_WITH_IMM,
@@ -2850,7 +2823,6 @@ static void fill_send_wr_ops_rc_xrc(struct ibv_qp_ex *qp_ex)
qp_ex->wr_rdma_write_imm = wr_rdma_write_imm;
qp_ex->wr_set_inline_data = wr_set_inline_data_rc;
qp_ex->wr_set_inline_data_list = wr_set_inline_data_list_rc;
- qp_ex->wr_local_inv = wr_local_inv_rc;
qp_ex->wr_atomic_cmp_swp = wr_atomic_cmp_swp;
qp_ex->wr_atomic_fetch_add = wr_atomic_fetch_add;
qp_ex->wr_set_sge = wr_set_sge_rc;
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index a22995d..d628d76 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -60,7 +60,6 @@ enum {
HNS_ROCE_WQE_OP_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
HNS_ROCE_WQE_OP_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
HNS_ROCE_WQE_OP_FAST_REG_PMR = 0xa,
- HNS_ROCE_WQE_OP_LOCAL_INV = 0xb,
HNS_ROCE_WQE_OP_BIND_MW_TYPE = 0xc,
HNS_ROCE_WQE_OP_MASK = 0x1f
};
@@ -85,7 +84,6 @@ enum {
HNS_ROCE_SQ_OP_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
HNS_ROCE_SQ_OP_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
HNS_ROCE_SQ_OP_FAST_REG_PMR = 0xa,
- HNS_ROCE_SQ_OP_LOCAL_INV = 0xb,
HNS_ROCE_SQ_OP_BIND_MW = 0xc,
};
--
2.25.1

View File

@ -1,69 +0,0 @@
From a86a120c35b1112bcef6c3821c2e5e1910e615e9 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Fri, 2 Jun 2023 10:33:14 +0800
Subject: [PATCH 2/4] libhns: Fix the owner bit error of sq in new io
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5Y5
---------------------------------------------------------------
The code does not use the head position of sq to set the owner bit,
but uses the head after adding 1 to cause an owner bit error. When
the wqe queue has not been flipped, the hardware has flipped based
on the owner bit judgment, resulting in failure to obtain wqe,
unable to send, and unable to generate cqe. This patch will set the
onwer bit ahead of time before the head value increases.
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 616d1ea..cde4801 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2215,6 +2215,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
qp->sq.wrid[wqe_idx] = wr_id;
qp->cur_wqe = wqe;
+
+ enable_wqe(qp, wqe, qp->sq.head);
+
qp->sq.head++;
return wqe;
@@ -2236,9 +2239,6 @@ static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
wqe->msg_len = htole32(length);
hr_reg_write(wqe, RCWQE_LEN0, length);
hr_reg_write(wqe, RCWQE_SGE_NUM, !!length);
- /* ignore ex sge start index */
-
- enable_wqe(qp, wqe, qp->sq.head);
}
static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg,
@@ -2541,6 +2541,9 @@ init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
qp->sq.wrid[wqe_idx] = wr_id;
qp->cur_wqe = wqe;
+
+ enable_wqe(qp, wqe, qp->sq.head);
+
qp->sq.head++;
return wqe;
@@ -2610,7 +2613,6 @@ static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey,
dseg->len = htole32(length);
qp->sge_info.start_idx++;
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
--
2.25.1

View File

@ -1,49 +0,0 @@
From cfea6efe6decfa8c209ad9a85e1290674370725e Mon Sep 17 00:00:00 2001
From: Junxian Huang <huangjunxian6@hisilicon.com>
Date: Fri, 2 Jun 2023 10:33:15 +0800
Subject: [PATCH 3/4] libhns: Fix incorrect post-send with direct wqe of
wr-list in user space
driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I7A2SA
---------------------------------------------------------------
Currently, direct wqe is not supported for wr-list. RoCE driver excludes
direct wqe for wr-list by judging whether the number of wr is 1.
For a wr-list where the second wr is a length-error atomic wr, the
post-send driver handles the first wr and adds 1 to the wr number counter
firstly. While handling the second wr, the driver finds out a length error
and terminates the wr handle process, remaining the counter at 1. This
causes the driver mistakenly judges there is only 1 wr and thus enters
the direct wqe process, carrying the current length-error atomic wqe.
This patch fixes the error by adding a judgement whether the current wr
is a bad wr. If so, use the normal doorbell process but not direct wqe
despite the wr number is 1.
Fixes: 159933c37450 ("libhns: Add support for direct wqe")
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
providers/hns/hns_roce_u_hw_v2.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index cde4801..bb26c59 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1480,7 +1480,8 @@ out:
udma_to_device_barrier();
- if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ if (nreq == 1 && !ret &&
+ (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
hns_roce_write_dwqe(qp, wqe);
else
hns_roce_update_sq_db(ctx, qp);
--
2.25.1

View File

@ -1,41 +0,0 @@
From 8fbf781e3b3630c25a361f7c5e3642350dcd21c9 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Fri, 2 Jun 2023 10:33:16 +0800
Subject: [PATCH 4/4] libhns: Add a judgment to the congestion control
algorithm
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A7HI
---------------------------------------------------------------
The congestion control algorithm is used only when the comp_mask flag
HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE is seted.
A check on comp_mask is added to prevent invalid parameter errors caused
by unconfigured congestion control algorithm types.
Fixes: 7623f24781f1 ("libhns: Support congestion control algorithm configuration")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 6c6120c..fa27fc1 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -988,6 +988,9 @@ static int check_qp_congest_type(struct hns_roce_context *ctx,
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+ if (!(hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE))
+ return 0;
+
if (!check_comp_mask(hns_attr->congest_type, hr_dev->congest_type)) {
verbs_err(&ctx->ibv_ctx, "unsupported congest type 0x%x.\n",
hns_attr->congest_type);
--
2.25.1

View File

@ -1,178 +0,0 @@
From fb07638be493fe0667e3fa0554ee434b7d6b3502 Mon Sep 17 00:00:00 2001
From: Junxian Huang <huangjunxian6@hisilicon.com>
Date: Fri, 9 Jun 2023 11:04:41 +0800
Subject: [PATCH] libhns: Support user to choose using UD sl or pktype to adapt
MPI APP
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5YM
---------------------------------------------------------------
According to Annex17_RoCEv2 (A17.4.5.2), for RoCEv2 UD, a CQE should
carry a flag that indicates if the received frame is an IPv4, IPv6 or
RoCE packet. But currently, the values of the flag corresponding to
these packet types haven't been defined yet in WC.
In UCX, 'sl' in ibv_wc for UD is used as the packet type flag, and the
packet type values have already been defined in the UCX patch of
https://github.com/openucx/ucx/commit/ed28845b88633e65d64fce8ec880060aa61bd59c
Therefore, to adapt UCX, add a create flag to hnsdv_create_qp() to allow
users to choose whether they use 'sl' in ibv_wc as service level or
packet type for UD. For the latter, obtain and translate the packet type
from CQE and fill it to 'sl' in ibv_wc.
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
---
providers/hns/hns_roce_u.h | 11 +++++++++++
providers/hns/hns_roce_u_hw_v2.c | 32 +++++++++++++++++++++++++++++---
providers/hns/hns_roce_u_verbs.c | 16 ++++++++++++++++
providers/hns/hnsdv.h | 1 +
4 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 41e9599..e3012e1 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -195,6 +195,12 @@ enum hns_roce_tc_map_mode {
HNS_ROCE_TC_MAP_MODE_DSCP,
};
+enum hns_roce_pktype {
+ HNS_ROCE_PKTYPE_ROCE_V1,
+ HNS_ROCE_PKTYPE_ROCE_V2_IPV6,
+ HNS_ROCE_PKTYPE_ROCE_V2_IPV4,
+};
+
struct hns_roce_db_page {
struct hns_roce_db_page *prev, *next;
struct hns_roce_buf buf;
@@ -406,6 +412,11 @@ struct hns_roce_qp {
void *cur_wqe;
unsigned int rb_sq_head; /* roll back sq head */
struct hns_roce_sge_info sge_info;
+
+ /* Just for UD. If not enabled, 'sl' in ibv_wc
+ * will be filled with 'port_type' in cqe.
+ */
+ bool enable_ud_sl;
};
struct hns_roce_av {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4b0ec5c..ee2fffe 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -522,9 +522,24 @@ static void handle_recv_rq_inl(struct hns_roce_v2_cqe *cqe,
handle_recv_inl_data(cqe, &(cur_qp->rq_rinl_buf), wr_num, wqe_buf);
}
-static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc)
+static const uint8_t pktype_for_ud[] = {
+ HNS_ROCE_PKTYPE_ROCE_V1,
+ HNS_ROCE_PKTYPE_ROCE_V2_IPV4,
+ HNS_ROCE_PKTYPE_ROCE_V2_IPV6
+};
+
+static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
+ bool enable_ud_sl)
{
- wc->sl = hr_reg_read(cqe, CQE_SL);
+ uint8_t port_type;
+
+ if (enable_ud_sl) {
+ wc->sl = hr_reg_read(cqe, CQE_SL);
+ } else {
+ port_type = hr_reg_read(cqe, CQE_PORT_TYPE);
+ wc->sl = pktype_for_ud[port_type];
+ }
+
wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
wc->slid = 0;
wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0;
@@ -554,7 +569,7 @@ static int parse_cqe_for_resp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
++wq->tail;
if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
- parse_for_ud_qp(cqe, wc);
+ parse_for_ud_qp(cqe, wc, hr_qp->enable_ud_sl);
if (hr_reg_read(cqe, CQE_CQE_INLINE))
handle_recv_cqe_inl_from_rq(cqe, hr_qp);
@@ -2126,6 +2141,17 @@ static uint32_t wc_read_slid(struct ibv_cq_ex *current)
static uint8_t wc_read_sl(struct ibv_cq_ex *current)
{
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
+ struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ struct hns_roce_qp *hr_qp;
+ uint8_t port_type;
+ uint32_t qpn;
+
+ qpn = hr_reg_read(cq->cqe, CQE_LCL_QPN);
+ hr_qp = hns_roce_v2_find_qp(ctx, qpn);
+ if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD && !hr_qp->enable_ud_sl) {
+ port_type = hr_reg_read(cq->cqe, CQE_PORT_TYPE);
+ return pktype_for_ud[port_type];
+ }
return (uint8_t)hr_reg_read(cq->cqe, CQE_SL);
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index fa27fc1..4b641ea 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1019,6 +1019,11 @@ static int check_qp_congest_type(struct hns_roce_context *ctx,
return 0;
}
+enum {
+ HNSDV_QP_SUP_CREATE_FLAGS = HNSDV_QP_CREATE_ENABLE_DCA_MODE |
+ HNSDV_QP_CREATE_ENABLE_UD_SL,
+};
+
static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
struct hnsdv_qp_init_attr *hns_attr,
struct hns_roce_cmd_flag *cmd_flag)
@@ -1034,6 +1039,14 @@ static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
return -EINVAL;
}
+ if (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS &&
+ !check_comp_mask(hns_attr->create_flags,
+ HNSDV_QP_SUP_CREATE_FLAGS)) {
+ verbs_err(&ctx->ibv_ctx, "invalid create_flags 0x%x.\n",
+ hns_attr->create_flags);
+ return -EOPNOTSUPP;
+ }
+
ret = check_qp_congest_type(ctx, hns_attr, cmd_flag);
if (ret)
return ret;
@@ -1685,6 +1698,9 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
qp_setup_config(attr, qp, context);
+ if (hns_attr && hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_UD_SL)
+ qp->enable_ud_sl = true;
+
return &qp->verbs_qp.qp;
err_dwqe:
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index e15b428..365c314 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -43,6 +43,7 @@ struct ibv_context *hnsdv_open_device(struct ibv_device *device,
enum hnsdv_qp_create_flags {
HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
+ HNSDV_QP_CREATE_ENABLE_UD_SL = 1 << 1,
};
enum hnsdv_qp_congest_ctrl_type {
--
2.25.1

View File

@ -1,64 +0,0 @@
From 14e8bd8f4e30abdaabbe7ffc3a19e4381b4130fe Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Wed, 24 May 2023 10:34:07 +0800
Subject: [PATCH 1/2] libhns: Remove unnecessary QP checks
mainline inclusion
from mainline-v47.0
commit 4ea9a4f77ac8c174d1ae4cf551a2e5abea13516f
category: cleanup
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7NT3M
CVE: NA
Reference: https://github.com/linux-rdma/rdma-core/commit/4ea9a4f77ac8c174d1ae4cf551a2e5abea13516f
----------------------------------------------------------------------
It is not necessary to check the type of the queue on post_send and
post_recv since the QP type that cannot be supported will be rejected
when it is created.
Check for srq in post_recv has also been removed, because the RQ of
QP bound to srq is empty and will return immediate error in post_recv.
Besides, remove the unused parameter ctx.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Signed-off-by: Juan Zhou <zhoujuan51@h-partners.com>
---
providers/hns/hns_roce_u_hw_v2.c | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index ee2fffe..688eff8 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -908,11 +908,6 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
int ret = 0;
- if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
- ibvqp->qp_type != IBV_QPT_UD) &&
- ibvqp->qp_type != IBV_QPT_XRC_SEND)
- return -EINVAL;
-
if (unlikely(ibvqp->state == IBV_QPS_RESET ||
ibvqp->state == IBV_QPS_INIT ||
ibvqp->state == IBV_QPS_RTR))
@@ -1516,11 +1511,7 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
int ret = 0;
- if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
- ibvqp->qp_type != IBV_QPT_UD))
- return -EINVAL;
-
- if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq)
+ if (ibvqp->state == IBV_QPS_RESET)
return -EINVAL;
if (check_dca_attach_enable(qp)) {
--
2.25.1

View File

@ -1,56 +0,0 @@
From e20de83892bcba8e8944cbf83e8acc9192d514bb Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 3 Jul 2023 15:22:57 +0800
Subject: [PATCH 2/2] libhns: Fix reference to uninitialized cq pointer
mainline inclusion
from mainline-v47.0
commit e850ebe1570c06990cc60e96d0085d5d2a1cfcaa
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7NTLE
CVE: NA
Reference: https://github.com/linux-rdma/rdma-core/commit/e850ebe1570c06990cc60e96d0085d5d2a1cfcaa
----------------------------------------------------------------------
In cases such as using XRC and SRQ where RQs are not created, the recv_cq
pointer will not be initailized, and thus the unsupported post recv
function should return before referencing the recv_cq pointer.
Fixes: 4ea9a4f77ac8 ("libhns: Remove unnecessary QP checks")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Signed-off-by: Juan Zhou <zhoujuan51@h-partners.com>
---
providers/hns/hns_roce_u_hw_v2.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 688eff8..9238fe5 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1599,15 +1599,15 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
max_sge = qp->rq.max_gs - qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_v2_wq_overflow(&qp->rq, nreq,
- to_hr_cq(qp->verbs_qp.qp.recv_cq))) {
- ret = ENOMEM;
+ if (wr->num_sge > max_sge) {
+ ret = max_sge > 0 ? EINVAL : EOPNOTSUPP;
*bad_wr = wr;
goto out;
}
- if (wr->num_sge > max_sge) {
- ret = EINVAL;
+ if (hns_roce_v2_wq_overflow(&qp->rq, nreq,
+ to_hr_cq(qp->verbs_qp.qp.recv_cq))) {
+ ret = ENOMEM;
*bad_wr = wr;
goto out;
}
--
2.25.1

View File

@ -1,543 +0,0 @@
From 456072a07831ca19f81b591e3c259932ece8dcae Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 26 Sep 2023 19:19:06 +0800
Subject: [PATCH 1/5] libhns: Support reporting wc as software mode
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
When HW is in resetting stage, we could not poll back all the expected
work completions as the HW won't generate cqe anymore.
This patch allows driver to compose the expected wc instead of the HW
during resetting stage. Once the hardware finished resetting, we can
poll cq from hardware again.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u.h | 12 ++
providers/hns/hns_roce_u_hw_v2.c | 216 +++++++++++++++++++++++++++++--
providers/hns/hns_roce_u_hw_v2.h | 2 +
providers/hns/hns_roce_u_verbs.c | 91 +++++++++++++
4 files changed, 309 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index e3012e1..b3f21ba 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -277,6 +277,8 @@ struct hns_roce_context {
unsigned int max_inline_data;
struct hns_roce_dca_ctx dca_ctx;
+
+ bool reseted;
};
struct hns_roce_td {
@@ -309,6 +311,11 @@ struct hns_roce_cq {
unsigned long flags;
unsigned int cqe_size;
struct hns_roce_v2_cqe *cqe;
+ struct list_head list_sq;
+ struct list_head list_rq;
+ struct list_head list_srq;
+ struct list_head list_xrc_srq;
+ struct hns_roce_v2_cqe *sw_cqe;
};
struct hns_roce_idx_que {
@@ -344,6 +351,7 @@ struct hns_roce_srq {
unsigned int wqe_shift;
unsigned int *db;
unsigned short counter;
+ struct list_node xrc_srcq_node;
};
struct hns_roce_wq {
@@ -413,6 +421,10 @@ struct hns_roce_qp {
unsigned int rb_sq_head; /* roll back sq head */
struct hns_roce_sge_info sge_info;
+ struct list_node rcq_node;
+ struct list_node scq_node;
+ struct list_node srcq_node;
+
/* Just for UD. If not enabled, 'sl' in ibv_wc
* will be filled with 'port_type' in cqe.
*/
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 9238fe5..4e92397 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -843,6 +843,180 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx,
return hns_roce_flush_cqe(*cur_qp, status);
}
+static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc,
+ uint64_t wr_id, uint32_t qp_num)
+{
+ if (!wc) {
+ cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR;
+ cq->verbs_cq.cq_ex.wr_id = wr_id;
+ hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num);
+ return;
+ }
+
+ wc->wr_id = wr_id;
+ wc->status = IBV_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->qp_num = qp_num;
+}
+
+static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
+ struct ibv_wc *wc, bool is_sq)
+{
+ struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq;
+ unsigned int left_wr;
+ uint64_t wr_id;
+
+ left_wr = wq->head - wq->tail;
+ if (left_wr == 0) {
+ if (is_sq)
+ list_del_init(&qp->scq_node);
+ else
+ list_del_init(&qp->rcq_node);
+
+ return -ENOENT;
+ }
+
+ wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num);
+ wq->tail++;
+ return V2_CQ_OK;
+}
+
+static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
+{
+ struct hns_roce_qp *next, *qp = NULL;
+
+ list_for_each_safe(&cq->list_sq, qp, next, scq_node) {
+ if (hns_roce_get_wq_swc(cq, qp, wc, true) == -ENOENT)
+ continue;
+
+ return V2_CQ_OK;
+ }
+
+ return !wc ? -ENOENT : V2_CQ_EMPTY;
+}
+
+static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
+{
+ struct hns_roce_qp *next, *qp = NULL;
+
+ list_for_each_safe(&cq->list_rq, qp, next, rcq_node) {
+ if (hns_roce_get_wq_swc(cq, qp, wc, false) == -ENOENT)
+ continue;
+
+ return V2_CQ_OK;
+ }
+
+ return !wc ? -ENOENT : V2_CQ_EMPTY;
+}
+
+static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp,
+ struct hns_roce_srq *srq, struct ibv_wc *wc)
+{
+ unsigned int left_wr;
+ uint64_t wr_id;
+
+ hns_roce_spin_lock(&srq->hr_lock);
+ left_wr = srq->idx_que.head - srq->idx_que.tail;
+ if (left_wr == 0) {
+ if (qp)
+ list_del_init(&qp->srcq_node);
+ else
+ list_del_init(&srq->xrc_srcq_node);
+
+ hns_roce_spin_unlock(&srq->hr_lock);
+ return -ENOENT;
+ }
+
+ wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)];
+ hns_roce_fill_swc(cq, wc, wr_id, srq->srqn);
+ srq->idx_que.tail++;
+ hns_roce_spin_unlock(&srq->hr_lock);
+
+ return V2_CQ_OK;
+}
+
+static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq,
+ struct ibv_wc *wc)
+{
+ struct hns_roce_qp *next, *qp = NULL;
+ struct hns_roce_srq *srq;
+
+ list_for_each_safe(&cq->list_srq, qp, next, srcq_node) {
+ srq = to_hr_srq(qp->verbs_qp.qp.srq);
+ if (hns_roce_get_srq_swc(cq, qp, srq, wc) == -ENOENT)
+ continue;
+
+ return V2_CQ_OK;
+ }
+
+ return !wc ? -ENOENT : V2_CQ_EMPTY;
+}
+
+static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
+{
+ struct hns_roce_srq *next, *srq = NULL;
+
+ list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) {
+ if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == -ENOENT)
+ continue;
+
+ return V2_CQ_OK;
+ }
+
+ return !wc ? -ENOENT : V2_CQ_EMPTY;
+}
+
+static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
+{
+ int err;
+
+ err = hns_roce_gen_common_srq_swc(cq, wc);
+ if (err == V2_CQ_OK)
+ return err;
+
+ return hns_roce_gen_xrc_srq_swc(cq, wc);
+}
+
+static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc)
+{
+ int err;
+
+ err = hns_roce_gen_sq_swc(cq, wc);
+ if (err == V2_CQ_OK)
+ return err;
+
+ err = hns_roce_gen_rq_swc(cq, wc);
+ if (err == V2_CQ_OK)
+ return err;
+
+ return hns_roce_gen_srq_swc(cq, wc);
+}
+
+static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc)
+{
+ int npolled;
+ int err;
+
+ for (npolled = 0; npolled < ne; npolled++) {
+ err = hns_roce_poll_one_swc(cq, wc + npolled);
+ if (err == V2_CQ_EMPTY)
+ break;
+ }
+
+ return npolled;
+}
+
+static bool hns_roce_reseted(struct hns_roce_context *ctx)
+{
+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
+
+ if (state && state->is_reset)
+ ctx->reseted = true;
+
+ return ctx->reseted;
+}
+
static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
struct ibv_wc *wc)
{
@@ -854,6 +1028,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
hns_roce_spin_lock(&cq->hr_lock);
+ if (unlikely(hns_roce_reseted(ctx))) {
+ npolled = hns_roce_poll_swc(cq, ne, wc);
+ hns_roce_spin_unlock(&cq->hr_lock);
+ return npolled;
+ }
+
for (npolled = 0; npolled < ne; ++npolled) {
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
if (qp && check_dca_detach_enable(qp))
@@ -1773,11 +1953,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
return ret;
}
-static void hns_roce_lock_cqs(struct ibv_qp *qp)
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
{
- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
-
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
hns_roce_spin_lock(&send_cq->hr_lock);
@@ -1795,11 +1972,8 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp)
}
}
-static void hns_roce_unlock_cqs(struct ibv_qp *qp)
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
{
- struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq);
- struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq);
-
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
hns_roce_spin_unlock(&send_cq->hr_lock);
@@ -1832,17 +2006,22 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
hns_roce_v2_clear_qp(ctx, qp);
- hns_roce_lock_cqs(ibqp);
+ hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
- if (ibqp->recv_cq)
+ if (ibqp->recv_cq) {
__hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num,
ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
+ list_del(&qp->srcq_node);
+ list_del(&qp->rcq_node);
+ }
- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
+ if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) {
__hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num,
NULL);
+ list_del(&qp->scq_node);
+ }
- hns_roce_unlock_cqs(ibqp);
+ hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
hns_roce_free_qp_buf(qp, ctx);
@@ -1988,10 +2167,16 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
hns_roce_spin_lock(&cq->hr_lock);
+ if (unlikely(hns_roce_reseted(ctx))) {
+ err = hns_roce_poll_one_swc(cq, NULL);
+ goto start_poll_done;
+ }
+
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (qp && check_dca_detach_enable(qp))
dca_detach_qp_buf(ctx, qp);
+start_poll_done:
if (err != V2_CQ_OK)
hns_roce_spin_unlock(&cq->hr_lock);
@@ -2005,6 +2190,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
struct hns_roce_qp *qp = NULL;
int err;
+ if (unlikely(hns_roce_reseted(ctx)))
+ return hns_roce_poll_one_swc(cq, NULL);
+
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (qp && check_dca_detach_enable(qp))
dca_detach_qp_buf(ctx, qp);
@@ -2024,11 +2212,15 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current));
struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ if (unlikely(hns_roce_reseted(ctx)))
+ goto end_poll_done;
+
if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)
*cq->db = cq->cons_index & RECORD_DB_CI_MASK;
else
update_cq_db(ctx, cq);
+end_poll_done:
hns_roce_spin_unlock(&cq->hr_lock);
}
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index d628d76..50a920f 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -346,5 +346,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags);
int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp);
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
#endif /* _HNS_ROCE_U_HW_V2_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 4b641ea..8fb415b 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -519,6 +519,32 @@ static int exec_cq_create_cmd(struct ibv_context *context,
return 0;
}
+static int hns_roce_init_cq_swc(struct hns_roce_cq *cq,
+ struct ibv_cq_init_attr_ex *attr)
+{
+ list_head_init(&cq->list_sq);
+ list_head_init(&cq->list_rq);
+ list_head_init(&cq->list_srq);
+ list_head_init(&cq->list_xrc_srq);
+
+ if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS))
+ return 0;
+
+ cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe));
+ if (!cq->sw_cqe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq)
+{
+ if (cq->sw_cqe) {
+ free(cq->sw_cqe);
+ cq->sw_cqe = NULL;
+ }
+}
+
static struct ibv_cq_ex *create_cq(struct ibv_context *context,
struct ibv_cq_init_attr_ex *attr)
{
@@ -564,6 +590,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
*cq->db = 0;
+ ret = hns_roce_init_cq_swc(cq, attr);
+ if (ret)
+ goto err_swc;
+
ret = exec_cq_create_cmd(context, cq, attr);
if (ret)
goto err_cmd;
@@ -573,6 +603,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
return &cq->verbs_cq.cq_ex;
err_cmd:
+ hns_roce_uninit_cq_swc(cq);
+err_swc:
hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
err_db:
hns_roce_free_buf(&cq->buf);
@@ -632,6 +664,8 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
if (ret)
return ret;
+ hns_roce_uninit_cq_swc(to_hr_cq(cq));
+
hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
HNS_ROCE_CQ_TYPE_DB);
hns_roce_free_buf(&to_hr_cq(cq)->buf);
@@ -839,6 +873,22 @@ static int exec_srq_create_cmd(struct ibv_context *context,
return 0;
}
+static void init_srq_cq_list(struct hns_roce_srq *srq,
+ struct ibv_srq_init_attr_ex *init_attr)
+{
+ struct hns_roce_cq *srq_cq;
+
+ list_node_init(&srq->xrc_srcq_node);
+
+ if (!init_attr->cq)
+ return;
+
+ srq_cq = to_hr_cq(init_attr->cq);
+ hns_roce_spin_lock(&srq_cq->hr_lock);
+ list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node);
+ hns_roce_spin_unlock(&srq_cq->hr_lock);
+}
+
static struct ibv_srq *create_srq(struct ibv_context *context,
struct ibv_srq_init_attr_ex *init_attr)
{
@@ -886,6 +936,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
init_attr->attr.max_sge =
min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
+ init_srq_cq_list(srq, init_attr);
+
return &srq->verbs_srq.srq;
err_destroy_srq:
@@ -958,12 +1010,26 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
return ret;
}
+static void del_srq_from_cq_list(struct hns_roce_srq *srq)
+{
+ struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq);
+
+ if (!srq_cq)
+ return;
+
+ hns_roce_spin_lock(&srq_cq->hr_lock);
+ list_del(&srq->xrc_srcq_node);
+ hns_roce_spin_unlock(&srq_cq->hr_lock);
+}
+
int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
{
struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context);
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
int ret;
+ del_srq_from_cq_list(srq);
+
ret = ibv_cmd_destroy_srq(ibv_srq);
if (ret)
return ret;
@@ -1648,6 +1714,30 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
return 0;
}
+static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp)
+{
+ struct hns_roce_cq *send_cq, *recv_cq;
+
+ send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL;
+ recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL;
+
+ list_node_init(&qp->scq_node);
+ list_node_init(&qp->rcq_node);
+ list_node_init(&qp->srcq_node);
+
+ hns_roce_lock_cqs(send_cq, recv_cq);
+ if (send_cq)
+ list_add_tail(&send_cq->list_sq, &qp->scq_node);
+ if (recv_cq) {
+ if (attr->srq)
+ list_add_tail(&recv_cq->list_srq, &qp->srcq_node);
+ else
+ list_add_tail(&recv_cq->list_rq, &qp->rcq_node);
+ }
+ hns_roce_unlock_cqs(send_cq, recv_cq);
+}
+
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
struct ibv_qp_init_attr_ex *attr,
struct hnsdv_qp_init_attr *hns_attr)
@@ -1697,6 +1787,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
}
qp_setup_config(attr, qp, context);
+ add_qp_to_cq_list(attr, qp);
if (hns_attr && hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_UD_SL)
qp->enable_ud_sl = true;
--
2.25.1

View File

@ -1,127 +0,0 @@
From 924086bac3b9c4c3c953a8a733b84bc3611c59e9 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 26 Sep 2023 19:19:07 +0800
Subject: [PATCH 2/5] libhns: return error when post send in reset state
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
If the device has been resetted, the original business will not be able
to continue. The current design is to allow users to continue issuing IO.
Such a design is meaningless, the user should perceive the exception and
restore the business as soon as possible.
The current kernel mode directly returns an error when device has been
resetted, and this patch can unify the behavior of the kernel mode and
user mode.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 53 +++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 18 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4e92397..29b6268 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1090,8 +1090,15 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (unlikely(ibvqp->state == IBV_QPS_RESET ||
ibvqp->state == IBV_QPS_INIT ||
- ibvqp->state == IBV_QPS_RTR))
+ ibvqp->state == IBV_QPS_RTR)) {
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+ "unsupported qp state, state = %d.\n", ibvqp->state);
return -EINVAL;
+ } else if (unlikely(hns_roce_reseted(ctx))) {
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to send, device has been reseted!\n");
+ return -EIO;
+ }
if (check_dca_attach_enable(qp)) {
ret = dca_attach_qp_buf(ctx, qp);
@@ -1691,8 +1698,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
int ret = 0;
- if (ibvqp->state == IBV_QPS_RESET)
+ if (ibvqp->state == IBV_QPS_RESET) {
+ verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+ "unsupported qp state, state = %d.\n", ibvqp->state);
return -EINVAL;
+ } else if (unlikely(hns_roce_reseted(ctx))) {
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "fail to recv, device has been reseted!\n");
+ return -EIO;
+ }
if (check_dca_attach_enable(qp)) {
ret = dca_attach_qp_buf(ctx, qp);
@@ -2099,6 +2113,16 @@ static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq)
hr_reg_write(db, DB_PI, srq->idx_que.head);
}
+static int check_srq_recv(struct hns_roce_context *ctx)
+{
+ if (hns_roce_reseted(ctx)) {
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "srq failed to recv, device has been reseted!\n");
+ return -EIO;
+ }
+ return 0;
+}
+
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
struct ibv_recv_wr *wr,
struct ibv_recv_wr **bad_wr)
@@ -2110,6 +2134,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
int ret = 0;
void *wqe;
+ ret = check_srq_recv(ctx);
+ if (ret) {
+ *bad_wr = wr;
+ return ret;
+ }
+
hns_roce_spin_lock(&srq->hr_lock);
max_sge = srq->max_gs - srq->rsv_sge;
@@ -2934,27 +2964,14 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
{
struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
- enum ibv_qp_state state = ibv_qp->qp_base.state;
int ret;
- if (state == IBV_QPS_RESET ||
- state == IBV_QPS_INIT ||
- state == IBV_QPS_RTR) {
- qp->err = EINVAL;
+ ret = check_qp_send(qp, ctx);
+ if (ret) {
+ qp->err = ret;
return;
}
- if (check_qp_dca_enable(qp)) {
- ret = dca_attach_qp_buf(ctx, qp);
- if (ret) {
- verbs_err_datapath(&ctx->ibv_ctx,
- "failed to attach QP-%u send, ret = %d.\n",
- qp->verbs_qp.qp.qp_num, ret);
- qp->err = ret;
- return;
- }
- }
-
hns_roce_spin_lock(&qp->sq.hr_lock);
qp->sge_info.start_idx = qp->next_sge;
qp->rb_sq_head = qp->sq.head;
--
2.25.1

View File

@ -1,113 +0,0 @@
From 87a32d939f7b4504c0a90adc0b0294adf5b8cad1 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 26 Sep 2023 19:19:08 +0800
Subject: [PATCH 3/5] libhns: separate the initialization steps of lock
driver inclusion
category: cleanup
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
Separate the initialization steps of the lock from create_cq() and
create_srq(), just like in create_qp(), to unify all create-style
processes.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 48 +++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 16 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 8fb415b..e7a7388 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -545,13 +545,28 @@ static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq)
}
}
+static int hns_roce_cq_spinlock_init(struct ibv_context *context,
+ struct hns_roce_cq *cq,
+ struct ibv_cq_init_attr_ex *attr)
+{
+ struct hns_roce_pad *pad = NULL;
+ int need_lock;
+
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
+ pad = to_hr_pad(attr->parent_domain);
+
+ need_lock = hns_roce_whether_need_lock(pad ? &pad->pd.ibv_pd : NULL);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
+
+ return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
+}
+
static struct ibv_cq_ex *create_cq(struct ibv_context *context,
struct ibv_cq_init_attr_ex *attr)
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
- struct hns_roce_pad *pad = NULL;
struct hns_roce_cq *cq;
- int need_lock;
int ret;
ret = verify_cq_create_attr(attr, hr_ctx);
@@ -564,14 +579,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
goto err;
}
- if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
- pad = to_hr_pad(attr->parent_domain);
-
- need_lock = hns_roce_whether_need_lock(pad ? &pad->pd.ibv_pd : NULL);
- if (!need_lock)
- verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
-
- ret = hns_roce_spinlock_init(&cq->hr_lock, need_lock);
+ ret = hns_roce_cq_spinlock_init(context, cq, attr);
if (ret)
goto err_lock;
@@ -889,12 +897,24 @@ static void init_srq_cq_list(struct hns_roce_srq *srq,
hns_roce_spin_unlock(&srq_cq->hr_lock);
}
+static int hns_roce_srq_spinlock_init(struct ibv_context *context,
+ struct hns_roce_srq *srq,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ int need_lock;
+
+ need_lock = hns_roce_whether_need_lock(attr->pd);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
+
+ return hns_roce_spinlock_init(&srq->hr_lock, need_lock);
+}
+
static struct ibv_srq *create_srq(struct ibv_context *context,
struct ibv_srq_init_attr_ex *init_attr)
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_srq *srq;
- int need_lock;
int ret;
ret = verify_srq_create_attr(hr_ctx, init_attr);
@@ -907,11 +927,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
goto err;
}
- need_lock = hns_roce_whether_need_lock(init_attr->pd);
- if (!need_lock)
- verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
-
- if (hns_roce_spinlock_init(&srq->hr_lock, need_lock))
+ if (hns_roce_srq_spinlock_init(context, srq, init_attr))
goto err_free_srq;
set_srq_param(context, srq, init_attr);
--
2.25.1

View File

@ -1,76 +0,0 @@
From 82f027c27a2dc9eddc5c7d8859d5c8e0a8105b71 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 26 Sep 2023 19:19:09 +0800
Subject: [PATCH 4/5] libhns: assign doorbell to zero when allocate it
driver inclusion
category: cleanup
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
Clear the doorbell when getting it to avoid clearing it in each
function that uses hns_roce_alloc_db()
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_db.c | 2 ++
providers/hns/hns_roce_u_verbs.c | 8 --------
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c
index f5acac2..73a71de 100644
--- a/providers/hns/hns_roce_u_db.c
+++ b/providers/hns/hns_roce_u_db.c
@@ -115,6 +115,8 @@ found:
out:
pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex);
+ if (db)
+ *((unsigned int *)db) = 0;
return db;
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index e7a7388..7b58dd0 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -596,8 +596,6 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
goto err_db;
}
- *cq->db = 0;
-
ret = hns_roce_init_cq_swc(cq, attr);
if (ret)
goto err_swc;
@@ -938,8 +936,6 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
if (!srq->db)
goto err_srq_buf;
- *srq->db = 0;
-
ret = exec_srq_create_cmd(context, srq, init_attr);
if (ret)
goto err_srq_db;
@@ -1591,8 +1587,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
if (!qp->sdb)
return -ENOMEM;
-
- *qp->sdb = 0;
}
if (attr->cap.max_recv_sge) {
@@ -1604,8 +1598,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
return -ENOMEM;
}
-
- *qp->rdb = 0;
}
return 0;
--
2.25.1

View File

@ -1,92 +0,0 @@
From de7b9a04b5bfd5cf40cc6c89dae3757f1823432a Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 26 Sep 2023 19:19:10 +0800
Subject: [PATCH 5/5] libhns: Fix missing reset notification.
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83L7U
----------------------------------------------------------
Currently, userspace driver get the reset notification by reading a
a shared variable which would be set to non-zero during reset. However,
if the user does not call driver's IO interface during reset, the reset
notification will be ignored. because this variable will be clear after
completes the reset.
This patch use a new reset flag to get whether the driver has been reset
at any time. A non-zero value will be assigned to this new reset
flag by default, which will permanently become 0 once a reset occurs.
During reset, the kernel space driver will assign 0 to this variable.
After reset, this variable will be remapped to a page of all zeros. The
userspace driver can judge whether the driver has been reset by whether
this variable is 0.
Fixes: 34f2ad8085c2 ("libhns: Add reset stop flow mechanism")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u.c | 4 ++++
providers/hns/hns_roce_u.h | 2 ++
providers/hns/hns_roce_u_hw_v2.c | 3 +++
3 files changed, 9 insertions(+)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 87f9ed8..0660081 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -226,6 +226,7 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
int page_size)
{
uint64_t reset_mmap_key = resp->reset_mmap_key;
+ struct hns_roce_v2_reset_state *state;
/* The reset mmap key is 0, which means it is not supported. */
if (reset_mmap_key == 0)
@@ -236,6 +237,9 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
if (ctx->reset_state == MAP_FAILED)
return -ENOMEM;
+ state = ctx->reset_state;
+ ctx->use_new_reset_flag = state->hw_ready;
+
return 0;
}
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index b3f21ba..5501d8e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -235,6 +235,7 @@ struct hns_roce_dca_ctx {
struct hns_roce_v2_reset_state {
uint32_t is_reset;
+ uint32_t hw_ready;
};
struct hns_roce_cmd_flag {
@@ -278,6 +279,7 @@ struct hns_roce_context {
struct hns_roce_dca_ctx dca_ctx;
+ bool use_new_reset_flag;
bool reseted;
};
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 29b6268..ac40d5d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1011,6 +1011,9 @@ static bool hns_roce_reseted(struct hns_roce_context *ctx)
{
struct hns_roce_v2_reset_state *state = ctx->reset_state;
+ if (ctx->use_new_reset_flag)
+ return !state->hw_ready;
+
if (state && state->is_reset)
ctx->reseted = true;
--
2.25.1

View File

@ -1,231 +0,0 @@
From d628c51d25b972a7d26e53ea400b3a0679d51f91 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 23 Oct 2023 21:13:03 +0800
Subject: [PATCH] libhns: Support flexible WQE buffer page size
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I87LTM
--------------------------------------------------------------------------
Currently, driver fixedly allocates 4K pages for user space WQE buffer
even in a 64K system. This results in HW reading WQE with a granularity
of 4K even in a 64K system. Considering that we support 1024-byte inline,
in the scenario of using SQ inline, HW will switch pages every 4 WQEs.
This will introduce a delay of about 400ns, which is an average delay of
100ns per packet.
In order to improve performance, we allow user-mode drivers to use a
larger page size to allocate WQE buffers, thereby reducing the latency
introduced by HW page switching. User-mode drivers will be allowed to
allocate WQE buffers between 4K to system page size. During
ibv_create_qp(), the driver will dynamically select the appropriate page
size based on ibv_qp_cap, thus reducing memory consumption while improving
performance.
This feature needs to be used in conjunction with the kernel-mode driver.
In order to ensure forward compatibility, if the kernel-mode driver does
not support this feature, the user-mode driver will continue to use a
fixed 4K pagesize to allocate WQE buffer.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 5 ++-
providers/hns/hns_roce_u.c | 2 +-
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_verbs.c | 65 ++++++++++++++++++++++++++------
4 files changed, 59 insertions(+), 14 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index cab941f..157dc9d 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -81,7 +81,8 @@ struct hns_roce_ib_create_qp {
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch;
- __u8 reserved[5];
+ __u8 reserved[4];
+ __u8 pageshift;
__aligned_u64 sdb_addr;
__aligned_u64 comp_mask;
__aligned_u64 create_flags;
@@ -122,6 +123,7 @@ enum {
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
+ HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4,
};
enum {
@@ -129,6 +131,7 @@ enum {
HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
+ HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ,
};
struct hns_roce_ib_alloc_ucontext_resp {
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 0660081..02ad880 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -267,7 +267,7 @@ static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
struct hnsdv_context_attr *attr)
{
cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS;
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_DYN_QP_PGSZ;
if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
return;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 5501d8e..ae9ae51 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -409,6 +409,7 @@ struct hns_roce_qp {
uint8_t sl;
uint8_t tc_mode;
uint8_t priority;
+ uint8_t pageshift;
unsigned int qkey;
enum ibv_mtu path_mtu;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 7b58dd0..f76341c 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1327,31 +1327,69 @@ static void free_recv_rinl_buf(struct hns_roce_rinl_buf *rinl_buf)
}
}
+static void get_best_multi_region_pg_shift(struct hns_roce_device *hr_dev,
+ struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp, bool dca_en)
+{
+ uint32_t ext_sge_size;
+ uint32_t sq_size;
+ uint32_t rq_size;
+ uint8_t pg_shift;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ) || dca_en) {
+ qp->pageshift = HNS_HW_PAGE_SHIFT;
+ return;
+ }
+
+ /*
+ * The larger the pagesize used, the better the performance, but it
+ * may waste more memory. Therefore, we use the least common multiple
+ * (aligned to power of 2) of sq wqe buffer size, rq wqe buffer size,
+ * and ext_sge buffer size as the pagesize. Additionally, since the
+ * kernel cannot guarantee the allocation of contiguous memory larger
+ * than the system page, the pagesize must be smaller than the system
+ * page.
+ */
+ sq_size = qp->sq.wqe_cnt << qp->sq.wqe_shift;
+ ext_sge_size = qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift;
+ rq_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
+ pg_shift = max_t(uint8_t, sq_size ? hr_ilog32(sq_size) : 0,
+ ext_sge_size ? hr_ilog32(ext_sge_size) : 0);
+ pg_shift = max_t(uint8_t, pg_shift, rq_size ? hr_ilog32(rq_size) : 0);
+ pg_shift = max_t(uint8_t, pg_shift, HNS_HW_PAGE_SHIFT);
+ qp->pageshift = min_t(uint8_t, pg_shift, hr_ilog32(hr_dev->page_size));
+}
+
static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
- struct hns_roce_qp *qp)
+ struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp, bool dca_en)
{
struct hns_roce_wq *sq = &qp->sq;
struct hns_roce_wq *rq = &qp->rq;
+ unsigned int page_size;
unsigned int size;
qp->buf_size = 0;
+ get_best_multi_region_pg_shift(hr_dev, ctx, qp, dca_en);
+ page_size = 1 << qp->pageshift;
/* SQ WQE */
sq->offset = 0;
- size = to_hr_hem_entries_size(sq->wqe_cnt, sq->wqe_shift);
+ size = align(sq->wqe_cnt << sq->wqe_shift, page_size);
qp->buf_size += size;
/* extend SGE WQE in SQ */
qp->ex_sge.offset = qp->buf_size;
if (qp->ex_sge.sge_cnt > 0) {
- size = to_hr_hem_entries_size(qp->ex_sge.sge_cnt,
- qp->ex_sge.sge_shift);
+ size = align(qp->ex_sge.sge_cnt << qp->ex_sge.sge_shift,
+ page_size);
qp->buf_size += size;
}
/* RQ WQE */
rq->offset = qp->buf_size;
- size = to_hr_hem_entries_size(rq->wqe_cnt, rq->wqe_shift);
+ size = align(rq->wqe_cnt << rq->wqe_shift, page_size);
qp->buf_size += size;
if (qp->buf_size < 1)
@@ -1375,7 +1413,7 @@ static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx,
if (hns_attr &&
(hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) &&
(hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE))
- return true;
+ return dca_ctx->max_size > 0;
return false;
}
@@ -1396,9 +1434,12 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+ bool dca_en = check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr);
+ int ret;
- if (calc_qp_buff_size(hr_dev, qp))
- return -EINVAL;
+ ret = calc_qp_buff_size(hr_dev, ctx, qp, dca_en);
+ if (ret)
+ return ret;
qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(uint64_t));
if (!qp->sq.wrid)
@@ -1416,19 +1457,18 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
goto err_alloc;
}
- if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) &&
- ctx->dca_ctx.max_size > 0) {
+ if (dca_en) {
/* when DCA is enabled, use a buffer list to store page addr */
qp->buf.buf = NULL;
qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
- qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
+ qp->dca_wqe.shift = qp->pageshift;
qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
if (!qp->dca_wqe.bufs)
goto err_alloc;
verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n");
} else {
if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
- HNS_HW_PAGE_SIZE))
+ 1 << qp->pageshift))
goto err_alloc;
}
@@ -1642,6 +1682,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
cmd_ex.buf_addr = (uintptr_t)qp->buf.buf;
cmd_ex.log_sq_stride = qp->sq.wqe_shift;
cmd_ex.log_sq_bb_count = hr_ilog32(qp->sq.wqe_cnt);
+ cmd_ex.pageshift = qp->pageshift;
if (cmd_flag->congest_type_flags) {
cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE;
--
2.25.1

View File

@ -1,46 +0,0 @@
From 73a5a85a9fd75e2dd461bfd129d263fde44aa5ea Mon Sep 17 00:00:00 2001
From: Junxian Huang <huangjunxian6@hisilicon.com>
Date: Mon, 16 Oct 2023 16:10:05 +0800
Subject: [PATCH 1/2] Update kernel headers
To commit: c9813b0b9992 ("RDMA/hns: Support SRQ record doorbell").
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
kernel-headers/rdma/hns-abi.h | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 157dc9d..6b94a89 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -52,15 +52,25 @@ struct hns_roce_ib_create_cq_resp {
__aligned_u64 cap_flags;
};
+enum hns_roce_srq_cap_flags {
+ HNS_ROCE_SRQ_CAP_RECORD_DB = 1 << 0,
+};
+
+enum hns_roce_srq_cap_flags_resp {
+ HNS_ROCE_RSP_SRQ_CAP_RECORD_DB = 1 << 0,
+};
+
struct hns_roce_ib_create_srq {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
__aligned_u64 que_addr;
+ __u32 req_cap_flags; /* Use enum hns_roce_srq_cap_flags */
+ __u32 reserved;
};
struct hns_roce_ib_create_srq_resp {
__u32 srqn;
- __u32 reserved;
+ __u32 cap_flags; /* Use enum hns_roce_srq_cap_flags */
};
enum hns_roce_create_qp_comp_mask {
--
2.25.1

View File

@ -1,150 +0,0 @@
From 2880d64c8d73375978d2767c5dd7803b444f9016 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 16 Oct 2023 16:10:06 +0800
Subject: [PATCH] libhns: Support SRQ record doorbell
Compared with normal doorbell, using record doorbell can shorten the
process of ringing the doorbell and reduce the latency.
During SRQ creation, the kernel driver will allocate doorbell buffer
and notify userspace whether the SRQ record doorbell is enabled with
the flag HNS_ROCE_RSP_SRQ_CAP_RECORD_DB. The userspace driver will decide
whether to use record doorbell or normal doorbell based on this flag
in post SRQ recv process.
This patch relies on the corresponding kernel patch:
RDMA/hns: Support SRQ record doorbell
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
providers/hns/hns_roce_u.h | 4 +++-
providers/hns/hns_roce_u_db.c | 1 +
providers/hns/hns_roce_u_hw_v2.c | 14 +++++++++-----
providers/hns/hns_roce_u_verbs.c | 12 +++++++-----
4 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index ae9ae51..197bde9 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -187,6 +187,7 @@ struct hns_roce_buf {
enum hns_roce_db_type {
HNS_ROCE_QP_TYPE_DB,
HNS_ROCE_CQ_TYPE_DB,
+ HNS_ROCE_SRQ_TYPE_DB,
HNS_ROCE_DB_TYPE_NUM
};
@@ -351,7 +352,8 @@ struct hns_roce_srq {
unsigned int max_gs;
unsigned int rsv_sge;
unsigned int wqe_shift;
- unsigned int *db;
+ unsigned int *rdb;
+ unsigned int cap_flags;
unsigned short counter;
struct list_node xrc_srcq_node;
};
diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c
index 73a71de..bbef988 100644
--- a/providers/hns/hns_roce_u_db.c
+++ b/providers/hns/hns_roce_u_db.c
@@ -41,6 +41,7 @@
static const unsigned int db_size[] = {
[HNS_ROCE_QP_TYPE_DB] = 4,
[HNS_ROCE_CQ_TYPE_DB] = 4,
+ [HNS_ROCE_SRQ_TYPE_DB] = 4,
};
static struct hns_roce_db_page *hns_roce_add_db_page(
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index ac40d5d..714a34e 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2109,11 +2109,15 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
idx_que->head++;
}
-static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq)
+static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db,
+ struct hns_roce_srq *srq)
{
hr_reg_write(db, DB_TAG, srq->srqn);
hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
hr_reg_write(db, DB_PI, srq->idx_que.head);
+
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
+ (__le32 *)db);
}
static int check_srq_recv(struct hns_roce_context *ctx)
@@ -2176,10 +2180,10 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
*/
udma_to_device_barrier();
- update_srq_db(&srq_db, srq);
-
- hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
- (__le32 *)&srq_db);
+ if (srq->cap_flags & HNS_ROCE_RSP_SRQ_CAP_RECORD_DB)
+ *srq->rdb = srq->idx_que.head & 0xffff;
+ else
+ update_srq_db(ctx, &srq_db, srq);
}
hns_roce_spin_unlock(&srq->hr_lock);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index f76341c..1c2d94d 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -866,7 +866,8 @@ static int exec_srq_create_cmd(struct ibv_context *context,
cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf;
cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf;
- cmd_ex.db_addr = (uintptr_t)srq->db;
+ cmd_ex.db_addr = (uintptr_t)srq->rdb;
+ cmd_ex.req_cap_flags |= HNS_ROCE_SRQ_CAP_RECORD_DB;
ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, init_attr,
&cmd_ex.ibv_cmd, sizeof(cmd_ex),
@@ -875,6 +876,7 @@ static int exec_srq_create_cmd(struct ibv_context *context,
return ret;
srq->srqn = resp_ex.srqn;
+ srq->cap_flags = resp_ex.cap_flags;
return 0;
}
@@ -932,8 +934,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
if (alloc_srq_buf(srq))
goto err_free_srq;
- srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB);
- if (!srq->db)
+ srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
+ if (!srq->rdb)
goto err_srq_buf;
ret = exec_srq_create_cmd(context, srq, init_attr);
@@ -956,7 +958,7 @@ err_destroy_srq:
ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
err_srq_db:
- hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
+ hns_roce_free_db(hr_ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
err_srq_buf:
free_srq_buf(srq);
@@ -1048,7 +1050,7 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
hns_roce_clear_srq(ctx, srq->srqn);
- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
+ hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
free_srq_buf(srq);
free(srq);
--
2.25.1

View File

@ -1,155 +0,0 @@
From c77fa69a300e97cc37bb5189f79e3a1ca6f30ac5 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Thu, 26 Oct 2023 14:35:03 +0800
Subject: [PATCH] libhns: Skip resolving MAC for RDMA over UBLink
driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8AL44
For RDMA over UBLink, MAC Layer if replaced by UBLink, and thus the
MAC addr is not nedded. So skip the MAC addr resolving for this mode.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
providers/hns/hns_roce_u.c | 37 +++++++++++++++++++++++++++++++-
providers/hns/hns_roce_u.h | 9 ++++----
providers/hns/hns_roce_u_hw_v2.c | 2 +-
providers/hns/hns_roce_u_verbs.c | 3 ++-
4 files changed, 44 insertions(+), 7 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 02ad880..cef64ec 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -56,6 +56,7 @@ static const struct verbs_match_ent hca_table[] = {
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22C, &hns_roce_u_hw_v2),
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22D, &hns_roce_u_hw_v2),
VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA22F, &hns_roce_u_hw_v2),
+ VERBS_PCI_MATCH(PCI_VENDOR_ID_HUAWEI, 0xA26A, &hns_roce_u_hw_v2),
{}
};
@@ -95,6 +96,23 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
+static struct {
+ uint32_t device_id;
+ enum hns_device_link_type link_type;
+} device_link_types[] = {
+ {0xA222, HNS_DEV_LINK_TYPE_ETH},
+ {0xA223, HNS_DEV_LINK_TYPE_ETH},
+ {0xA224, HNS_DEV_LINK_TYPE_ETH},
+ {0xA225, HNS_DEV_LINK_TYPE_ETH},
+ {0xA226, HNS_DEV_LINK_TYPE_ETH},
+ {0xA228, HNS_DEV_LINK_TYPE_ETH},
+ {0xA22F, HNS_DEV_LINK_TYPE_ETH},
+ {0xA227, HNS_DEV_LINK_TYPE_HCCS},
+ {0xA22C, HNS_DEV_LINK_TYPE_HCCS},
+ {0xA22D, HNS_DEV_LINK_TYPE_HCCS},
+ {0xA26A, HNS_DEV_LINK_TYPE_UB}
+};
+
static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
int page_size, size_t size, uint64_t mmap_key)
{
@@ -256,6 +274,21 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev,
return 0;
}
+static int get_link_type(uint32_t device_id,
+ enum hns_device_link_type *link_type)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(device_link_types); i++) {
+ if (device_id == device_link_types[i].device_id) {
+ *link_type = device_link_types[i].link_type;
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
{
uint32_t count_shift = hr_ilog32(entry_count);
@@ -302,7 +335,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
&resp.ibv_resp, sizeof(resp)))
goto err_free;
- hr_dev->mac_type = resp.mac_type;
hr_dev->congest_type = resp.congest_type;
if (!resp.cqe_size)
@@ -338,6 +370,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
goto err_free;
hr_dev->hw_version = dev_attrs.hw_ver;
+ if (get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type))
+ hr_dev->link_type = resp.mac_type;
+
context->max_qp_wr = dev_attrs.max_qp_wr;
context->max_sge = dev_attrs.max_sge;
context->max_cqe = dev_attrs.max_cqe;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 197bde9..662eb8a 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -161,9 +161,10 @@ enum {
#define HNS_ROCE_SRQ_TABLE_BITS 8
#define HNS_ROCE_SRQ_TABLE_SIZE BIT(HNS_ROCE_SRQ_TABLE_BITS)
-enum {
- HNAE3_MAC_ETH,
- HNAE3_MAC_ROH,
+enum hns_device_link_type {
+ HNS_DEV_LINK_TYPE_ETH,
+ HNS_DEV_LINK_TYPE_HCCS,
+ HNS_DEV_LINK_TYPE_UB,
};
struct hns_roce_device {
@@ -171,7 +172,7 @@ struct hns_roce_device {
int page_size;
const struct hns_roce_u_hw *u_hw;
int hw_version;
- uint8_t mac_type;
+ enum hns_device_link_type link_type;
uint8_t congest_type;
};
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 714a34e..fbd02dc 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1431,7 +1431,7 @@ static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
if (ret)
return ret;
- if (hr_dev->mac_type == HNAE3_MAC_ROH)
+ if (hr_dev->link_type == HNS_DEV_LINK_TYPE_HCCS)
ud_sq_wqe->dmac[0] = 0xFF;
ret = fill_ud_data_seg(ud_sq_wqe, qp, wr, sge_info);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 1c2d94d..c3b01a0 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -2019,7 +2019,8 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
goto err;
- if (ibv_resolve_eth_l2_from_gid(pd->context, attr, ah->av.mac, NULL))
+ if (hr_dev->link_type != HNS_DEV_LINK_TYPE_UB &&
+ ibv_resolve_eth_l2_from_gid(pd->context, attr, ah->av.mac, NULL))
goto err;
if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
--
2.25.1

View File

@ -1,44 +0,0 @@
From 3bf0c428672478f2460bd16483d33fd0ccdcb718 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:15 +0800
Subject: [PATCH 1/8] Update kernel headers for libhns query_device()
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
To commit 513ecf0e21d3 ("RDMA/hns: Support query HW ID from user space.")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 6b94a89..d411c33 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -232,4 +232,18 @@ enum hns_ib_dca_mem_query_attrs {
HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
};
+#define HNS_IB_INVALID_ID 0XFFFF
+struct hns_roce_ib_hw_id {
+ __u16 chip_id;
+ __u16 die_id;
+ __u16 func_id;
+ __u16 reserved;
+};
+
+struct hns_roce_ib_query_device_resp {
+ __u32 comp_mask;
+ __u32 len;
+ struct hns_roce_ib_hw_id hw_id;
+};
+
#endif /* HNS_ABI_USER_H */
--
2.25.1

View File

@ -1,230 +0,0 @@
From 26d1c2339d0cc633b3817a78303eff6635a5fe88 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:16 +0800
Subject: [PATCH 2/8] libhns: Support query HW ID by hnsdv_query_device()
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
Supports obtaining hardware-related ID through hnsdv_query_device()
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u.c | 43 ++++++++++++++++++++++----------
providers/hns/hns_roce_u.h | 1 +
providers/hns/hns_roce_u_abi.h | 3 +++
providers/hns/hns_roce_u_verbs.c | 32 ++++++++++++++++++------
providers/hns/hnsdv.h | 11 ++++++++
5 files changed, 70 insertions(+), 20 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index cef64ec..1085b85 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -313,6 +313,35 @@ static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
}
}
+static void set_default_hw_id(struct hns_roce_context *context)
+{
+ context->hw_id.chip_id = HNSDV_INVALID_HW_ID;
+ context->hw_id.die_id = HNSDV_INVALID_HW_ID;
+ context->hw_id.func_id = HNSDV_INVALID_HW_ID;
+}
+
+static int query_dev_attr(struct hns_roce_context *context,
+ struct hns_roce_device *hr_dev)
+{
+ struct ibv_device_attr_ex attrx = {};
+ struct ibv_device_attr *dev_attrs = &attrx.orig_attr;
+
+ set_default_hw_id(context);
+
+ if (hns_roce_u_query_device(&context->ibv_ctx.context, NULL, &attrx,
+ sizeof(attrx)))
+ return EINVAL;
+
+ hr_dev->hw_version = dev_attrs->hw_ver;
+ context->max_qp_wr = dev_attrs->max_qp_wr;
+ context->max_sge = dev_attrs->max_sge;
+ context->max_cqe = dev_attrs->max_cqe;
+ context->max_srq_wr = dev_attrs->max_srq_wr;
+ context->max_srq_sge = dev_attrs->max_srq_sge;
+
+ return 0;
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -321,7 +350,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
struct hns_roce_alloc_ucontext_resp resp = {};
struct hns_roce_alloc_ucontext cmd = {};
- struct ibv_device_attr dev_attrs;
struct hns_roce_context *context;
int i;
@@ -362,23 +390,12 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
context->srq_table[i].refcnt = 0;
- if (hns_roce_u_query_device(&context->ibv_ctx.context, NULL,
- container_of(&dev_attrs,
- struct ibv_device_attr_ex,
- orig_attr),
- sizeof(dev_attrs)))
+ if (query_dev_attr(context, hr_dev))
goto err_free;
- hr_dev->hw_version = dev_attrs.hw_ver;
if (get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type))
hr_dev->link_type = resp.mac_type;
- context->max_qp_wr = dev_attrs.max_qp_wr;
- context->max_sge = dev_attrs.max_sge;
- context->max_cqe = dev_attrs.max_cqe;
- context->max_srq_wr = dev_attrs.max_srq_wr;
- context->max_srq_sge = dev_attrs.max_srq_sge;
-
if (init_dca_context(context, cmd_fd,
&resp, ctx_attr, hr_dev->page_size))
goto err_free;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 662eb8a..323d2f9 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -280,6 +280,7 @@ struct hns_roce_context {
unsigned int max_inline_data;
struct hns_roce_dca_ctx dca_ctx;
+ struct hnsdv_hw_id hw_id;
bool use_new_reset_flag;
bool reseted;
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 1eaf62d..c73a30d 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -68,4 +68,7 @@ DECLARE_DRV_CMD(hns_roce_create_srq_ex, IB_USER_VERBS_CMD_CREATE_XSRQ,
DECLARE_DRV_CMD(hns_roce_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP,
empty, hns_roce_ib_modify_qp_resp);
+DECLARE_DRV_CMD(hns_roce_query_device_ex, IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ empty, hns_roce_ib_query_device_resp);
+
#endif /* _HNS_ROCE_U_ABI_H */
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index c3b01a0..5b86077 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -84,22 +84,27 @@ void hns_roce_init_qp_indices(struct hns_roce_qp *qp)
qp->next_sge = 0;
}
+#define HNSDV_CONVERT_HW_ID(val) \
+ ((val) == HNS_IB_INVALID_ID ? HNSDV_INVALID_HW_ID : (val))
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size)
{
- struct ib_uverbs_ex_query_device_resp resp;
+ struct hns_roce_context *ctx = to_hr_ctx(context);
+ struct hns_roce_query_device_ex_resp resp = {};
unsigned int major, minor, sub_minor;
size_t resp_size = sizeof(resp);
uint64_t raw_fw_ver;
int ret;
- ret = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp,
- &resp_size);
+ ctx = container_of(context, struct hns_roce_context, ibv_ctx.context);
+ ret = ibv_cmd_query_device_any(context, input, attr, attr_size,
+ &resp.ibv_resp, &resp_size);
if (ret)
return ret;
- raw_fw_ver = resp.base.fw_ver;
+ raw_fw_ver = resp.ibv_resp.base.fw_ver;
major = (raw_fw_ver >> 32) & 0xffff;
minor = (raw_fw_ver >> 16) & 0xffff;
sub_minor = raw_fw_ver & 0xffff;
@@ -107,27 +112,40 @@ int hns_roce_u_query_device(struct ibv_context *context,
snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver),
"%u.%u.%03u", major, minor, sub_minor);
+ if (resp.len >= offsetofend(typeof(resp.drv_payload), hw_id)) {
+ ctx->hw_id.chip_id = HNSDV_CONVERT_HW_ID(resp.hw_id.chip_id);
+ ctx->hw_id.die_id = HNSDV_CONVERT_HW_ID(resp.hw_id.die_id);
+ ctx->hw_id.func_id = HNSDV_CONVERT_HW_ID(resp.hw_id.func_id);
+ }
+
return 0;
}
int hnsdv_query_device(struct ibv_context *context,
struct hnsdv_context *attrs_out)
{
+ struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL;
struct hns_roce_device *hr_dev = to_hr_dev(context->device);
+ if (!ctx || !attrs_out)
+ return EINVAL;
+
if (!hr_dev) {
verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n");
return EOPNOTSUPP;
}
- if (!attrs_out)
- return EINVAL;
-
memset(attrs_out, 0, sizeof(*attrs_out));
attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_CONGEST_TYPE;
attrs_out->congest_type = hr_dev->congest_type;
+ if (ctx->hw_id.chip_id != HNSDV_INVALID_HW_ID) {
+ attrs_out->comp_mask |= HNSDV_CONTEXT_MASK_HW_ID;
+ memcpy(&attrs_out->hw_id, &ctx->hw_id,
+ sizeof(struct hnsdv_hw_id));
+ }
+
return 0;
}
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index 365c314..159edb8 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -70,12 +70,23 @@ struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
enum hnsdv_query_context_comp_mask {
HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0,
+ HNSDV_CONTEXT_MASK_HW_ID = 1 << 1,
+};
+
+#define HNSDV_INVALID_HW_ID -1
+struct hnsdv_hw_id {
+ int32_t chip_id;
+ int32_t die_id;
+ int32_t func_id;
+ int32_t reserved;
};
struct hnsdv_context {
uint64_t comp_mask; /* use enum hnsdv_query_context_comp_mask */
uint64_t flags;
uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
+ uint8_t rsv[7];
+ struct hnsdv_hw_id hw_id;
};
int hnsdv_query_device(struct ibv_context *ctx_in,
--
2.25.1

View File

@ -1,49 +0,0 @@
From 4fe79a9dc4b3b735ededf17dada62d022d170394 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:17 +0800
Subject: [PATCH 3/8] Update kernel headers for supporting POE CQs
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
To commit eb68cb8c47ed ("RDMA/hns: Support configuring POE channels and creating POE CQs")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index d411c33..817fc06 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -36,15 +36,23 @@
#include <linux/types.h>
+enum hns_roce_create_cq_create_flags {
+ HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE = 1 << 0,
+};
+
struct hns_roce_ib_create_cq {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
__u32 cqe_size;
__u32 reserved;
+ __aligned_u64 create_flags; /* Use enum hns_roce_create_cq_create_flags */
+ __u8 poe_channel;
+ __u8 rsv[7];
};
enum hns_roce_cq_cap_flags {
HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0,
+ HNS_ROCE_CQ_FLAG_POE_EN = 1 << 2,
};
struct hns_roce_ib_create_cq_resp {
--
2.25.1

View File

@ -1,354 +0,0 @@
From 80a8e63d5e764868cbaf2af3e522a320b5781508 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:18 +0800
Subject: [PATCH 4/8] libhns: Add support for POE CQs
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
Added support for POE CQs. POE CQs will forward CQE directly to
the corresponding POE channel.
In this case, the driver cannot update the QP CI, so
hnsdv_update_sq_ci() is added to allow users to change the QP CI.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u.c | 11 ++--
providers/hns/hns_roce_u_hw_v2.c | 19 ++++++
providers/hns/hns_roce_u_verbs.c | 110 ++++++++++++++++++++++++++++---
providers/hns/hnsdv.h | 22 +++++++
providers/hns/libhns.map | 2 +
5 files changed, 150 insertions(+), 14 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 1085b85..084385b 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -321,7 +321,8 @@ static void set_default_hw_id(struct hns_roce_context *context)
}
static int query_dev_attr(struct hns_roce_context *context,
- struct hns_roce_device *hr_dev)
+ struct hns_roce_device *hr_dev,
+ struct hns_roce_alloc_ucontext_resp *resp)
{
struct ibv_device_attr_ex attrx = {};
struct ibv_device_attr *dev_attrs = &attrx.orig_attr;
@@ -339,6 +340,9 @@ static int query_dev_attr(struct hns_roce_context *context,
context->max_srq_wr = dev_attrs->max_srq_wr;
context->max_srq_sge = dev_attrs->max_srq_sge;
+ if (get_link_type(dev_attrs->vendor_part_id, &hr_dev->link_type))
+ hr_dev->link_type = resp->mac_type;
+
return 0;
}
@@ -390,12 +394,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
context->srq_table[i].refcnt = 0;
- if (query_dev_attr(context, hr_dev))
+ if (query_dev_attr(context, hr_dev, &resp))
goto err_free;
- if (get_link_type(dev_attrs.vendor_part_id, &hr_dev->link_type))
- hr_dev->link_type = resp.mac_type;
-
if (init_dca_context(context, cmd_fd,
&resp, ctx_attr, hr_dev->page_size))
goto err_free;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index fbd02dc..688b760 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1029,6 +1029,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
int err = V2_CQ_OK;
int npolled;
+ if (cq->flags & HNS_ROCE_CQ_FLAG_POE_EN)
+ return V2_CQ_POLL_ERR;
+
hns_roce_spin_lock(&cq->hr_lock);
if (unlikely(hns_roce_reseted(ctx))) {
@@ -2202,6 +2205,9 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
if (attr->comp_mask)
return EINVAL;
+ if (cq->flags & HNS_ROCE_CQ_FLAG_POE_EN)
+ return EOPNOTSUPP;
+
hns_roce_spin_lock(&cq->hr_lock);
if (unlikely(hns_roce_reseted(ctx))) {
@@ -3119,6 +3125,19 @@ int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr,
return 0;
}
+void hnsdv_update_sq_ci(struct ibv_qp *ibv_qp, uint32_t inc_cnt)
+{
+ struct hns_roce_qp *qp;
+ struct hns_roce_wq *wq;
+
+ if (!ibv_qp)
+ return;
+
+ qp = to_hr_qp(ibv_qp);
+ wq = &qp->sq;
+ wq->tail += inc_cnt & (wq->wqe_cnt - 1);
+}
+
const struct hns_roce_u_hw hns_roce_u_hw_v2 = {
.hw_version = HNS_ROCE_HW_VER2,
.hw_ops = {
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 5b86077..c7863d7 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -36,6 +36,7 @@
#include <math.h>
#include <errno.h>
#include <pthread.h>
+#include <inttypes.h>
#include <sys/mman.h>
#include <ccan/ilog.h>
#include <ccan/minmax.h>
@@ -470,8 +471,40 @@ enum {
IBV_WC_EX_WITH_CVLAN,
};
+enum {
+ HNSDV_CQ_SUP_COMP_MASK = HNSDV_CQ_INIT_ATTR_MASK_CREATE_FLAGS,
+};
+
+enum {
+ HNSDV_CQ_SUP_CREATE_FLAGS = HNSDV_CQ_CREATE_ENABLE_POE_MODE,
+};
+
+static int verify_hnsdv_cq_create_attr(struct hns_roce_context *ctx,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
+{
+ if (!hns_cq_attr)
+ return 0;
+
+ if (!check_comp_mask(hns_cq_attr->comp_mask, HNSDV_CQ_SUP_COMP_MASK)) {
+ verbs_err(&ctx->ibv_ctx, "Unsupported cq comps 0x%"PRIu64"\n",
+ hns_cq_attr->comp_mask);
+ return EOPNOTSUPP;
+ }
+
+ if ((hns_cq_attr->comp_mask & HNSDV_CQ_INIT_ATTR_MASK_CREATE_FLAGS) &&
+ !check_comp_mask(hns_cq_attr->create_flags,
+ HNSDV_CQ_SUP_CREATE_FLAGS)) {
+ verbs_err(&ctx->ibv_ctx, "Unsupported cq flags 0x%"PRIu64"\n",
+ hns_cq_attr->create_flags);
+ return EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
- struct hns_roce_context *context)
+ struct hns_roce_context *context,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
{
if (!attr->cqe || attr->cqe > context->max_cqe)
return -EINVAL;
@@ -495,7 +528,7 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
roundup_pow_of_two(attr->cqe));
- return 0;
+ return verify_hnsdv_cq_create_attr(context, hns_cq_attr);
}
static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
@@ -508,9 +541,34 @@ static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
return 0;
}
+static void set_hnsdv_cq_attr(struct hns_roce_ib_create_cq *cmd_drv,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
+{
+ if (!hns_cq_attr)
+ return;
+
+ if (hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_POE_MODE) {
+ cmd_drv->create_flags |= HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE;
+ cmd_drv->poe_channel = hns_cq_attr->poe_channel;
+ }
+}
+
+static int check_hnsdv_cq_flags(struct hnsdv_cq_init_attr *hns_cq_attr,
+ struct hns_roce_cq *cq)
+{
+ if (!hns_cq_attr)
+ return 0;
+
+ if ((hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_POE_MODE) &&
+ !(cq->flags & HNS_ROCE_CQ_FLAG_POE_EN))
+ return EOPNOTSUPP;
+ return 0;
+}
+
static int exec_cq_create_cmd(struct ibv_context *context,
struct hns_roce_cq *cq,
- struct ibv_cq_init_attr_ex *attr)
+ struct ibv_cq_init_attr_ex *attr,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
{
struct hns_roce_create_cq_ex_resp resp_ex = {};
struct hns_roce_ib_create_cq_resp *resp_drv;
@@ -525,6 +583,8 @@ static int exec_cq_create_cmd(struct ibv_context *context,
cmd_drv->db_addr = (uintptr_t)cq->db;
cmd_drv->cqe_size = (uintptr_t)cq->cqe_size;
+ set_hnsdv_cq_attr(cmd_drv, hns_cq_attr);
+
ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq,
&cmd_ex.ibv_cmd, sizeof(cmd_ex),
&resp_ex.ibv_resp, sizeof(resp_ex), 0);
@@ -534,7 +594,14 @@ static int exec_cq_create_cmd(struct ibv_context *context,
cq->cqn = resp_drv->cqn;
cq->flags = resp_drv->cap_flags;
- return 0;
+ ret = check_hnsdv_cq_flags(hns_cq_attr, cq);
+ if (ret)
+ goto flags_err;
+ return ret;
+
+flags_err:
+ ibv_cmd_destroy_cq(&cq->verbs_cq.cq);
+ return ret;
}
static int hns_roce_init_cq_swc(struct hns_roce_cq *cq,
@@ -581,13 +648,14 @@ static int hns_roce_cq_spinlock_init(struct ibv_context *context,
}
static struct ibv_cq_ex *create_cq(struct ibv_context *context,
- struct ibv_cq_init_attr_ex *attr)
+ struct ibv_cq_init_attr_ex *attr,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
struct hns_roce_cq *cq;
int ret;
- ret = verify_cq_create_attr(attr, hr_ctx);
+ ret = verify_cq_create_attr(attr, hr_ctx, hns_cq_attr);
if (ret)
goto err;
@@ -618,7 +686,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
if (ret)
goto err_swc;
- ret = exec_cq_create_cmd(context, cq, attr);
+ ret = exec_cq_create_cmd(context, cq, attr, hns_cq_attr);
if (ret)
goto err_cmd;
@@ -652,7 +720,7 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
.comp_vector = comp_vector,
};
- cq = create_cq(context, &attr);
+ cq = create_cq(context, &attr, NULL);
return cq ? ibv_cq_ex_to_cq(cq) : NULL;
}
@@ -661,13 +729,37 @@ struct ibv_cq_ex *hns_roce_u_create_cq_ex(struct ibv_context *context,
{
struct ibv_cq_ex *cq;
- cq = create_cq(context, attr);
+ cq = create_cq(context, attr, NULL);
if (cq)
hns_roce_attach_cq_ex_ops(cq, attr->wc_flags);
return cq;
}
+struct ibv_cq_ex *hnsdv_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
+{
+ struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL;
+ struct ibv_cq_ex *cq;
+
+ if (!ctx || !cq_attr) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!is_hns_dev(context->device)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ cq = create_cq(context, cq_attr, hns_cq_attr);
+ if (cq)
+ hns_roce_attach_cq_ex_ops(cq, cq_attr->wc_flags);
+
+ return cq;
+}
+
void hns_roce_u_cq_event(struct ibv_cq *cq)
{
to_hr_cq(cq)->arm_sn++;
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index 159edb8..c5c7c11 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -92,6 +92,28 @@ struct hnsdv_context {
int hnsdv_query_device(struct ibv_context *ctx_in,
struct hnsdv_context *attrs_out);
+enum hnsdv_cq_init_attr_mask {
+ HNSDV_CQ_INIT_ATTR_MASK_CREATE_FLAGS = 1 << 0,
+};
+
+enum hnsdv_cq_create_flags {
+ HNSDV_CQ_CREATE_ENABLE_POE_MODE = 1 << 0,
+};
+
+struct hnsdv_cq_init_attr {
+ uint64_t comp_mask; /* Use enum hnsdv_cq_init_attr_mask */
+ uint64_t create_flags; /* Use enum hnsdv_cq_create_flags */
+ uint8_t poe_channel; /* poe channel to use */
+ uint8_t reserved[7];
+};
+
+struct ibv_cq_ex *hnsdv_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr,
+ struct hnsdv_cq_init_attr *hns_cq_attr);
+
+/* used in stars mode */
+void hnsdv_update_sq_ci(struct ibv_qp *qp, uint32_t inc_idx);
+
#ifdef __cplusplus
}
#endif
diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
index ebf28eb..27efc83 100644
--- a/providers/hns/libhns.map
+++ b/providers/hns/libhns.map
@@ -6,5 +6,7 @@ HNS_1.0 {
hnsdv_open_device;
hnsdv_create_qp;
hnsdv_query_device;
+ hnsdv_create_cq_ex;
+ hnsdv_update_sq_ci;
local: *;
};
--
2.25.1

View File

@ -1,60 +0,0 @@
From 56e347494c9d534e521ce836ad8419d0e3857e95 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:19 +0800
Subject: [PATCH 5/8] Update kernel headers for supporting STARS QP in HNS
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
To commit 9044608a998e ("RDMA/hns: Support STARS mode QP")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 817fc06..5969bde 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -86,6 +86,10 @@ enum hns_roce_create_qp_comp_mask {
HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 1,
};
+enum hns_roce_create_qp_flags {
+ HNS_ROCE_CREATE_QP_FLAGS_STARS_MODE = 1 << 0,
+};
+
enum hns_roce_congest_type_flags {
HNS_ROCE_CREATE_QP_FLAGS_DCQCN = 1 << 0,
HNS_ROCE_CREATE_QP_FLAGS_LDCP = 1 << 1,
@@ -102,8 +106,8 @@ struct hns_roce_ib_create_qp {
__u8 reserved[4];
__u8 pageshift;
__aligned_u64 sdb_addr;
- __aligned_u64 comp_mask;
- __aligned_u64 create_flags;
+ __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */
+ __aligned_u64 create_flags; /* Use enum hns_roce_create_qp_flags */
__aligned_u64 congest_type_flags;
};
@@ -115,10 +119,11 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
+ HNS_ROCE_QP_CAP_STARS_SQ_MODE = 1 << 7,
};
struct hns_roce_ib_create_qp_resp {
- __aligned_u64 cap_flags;
+ __aligned_u64 cap_flags; /* Use enum hns_roce_qp_cap_flags */
__aligned_u64 dwqe_mmap_key;
};
--
2.25.1

View File

@ -1,203 +0,0 @@
From 904688d4b1cd02c27994c192d58b26903ac4e951 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:20 +0800
Subject: [PATCH 6/8] libhns: Support STARS mode QP
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
STARS is a HW scheduler. QP in STARS mode will be taken over by STARS's
HW.
In this case, there is no need to drive the doorbell, otherwise it may
cause a CQE error.
Currently STARS only supports taking over SQ, so it only supports RDMA
operations.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++-----
providers/hns/hns_roce_u_verbs.c | 59 ++++++++++++++++++++++++++++++--
providers/hns/hnsdv.h | 2 ++
3 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 688b760..ab6d652 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1675,11 +1675,13 @@ out:
udma_to_device_barrier();
- if (nreq == 1 && !ret &&
- (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
- hns_roce_write_dwqe(qp, wqe);
- else
- hns_roce_update_sq_db(ctx, qp);
+ if (!(qp->flags & HNS_ROCE_QP_CAP_STARS_SQ_MODE)) {
+ if (nreq == 1 && !ret &&
+ (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ hns_roce_write_dwqe(qp, wqe);
+ else
+ hns_roce_update_sq_db(ctx, qp);
+ }
if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
*(qp->sdb) = qp->sq.head & 0xffff;
@@ -3008,10 +3010,13 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
qp->next_sge = qp->sge_info.start_idx;
udma_to_device_barrier();
- if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
- hns_roce_write_dwqe(qp, qp->cur_wqe);
- else
- hns_roce_update_sq_db(ctx, qp);
+ if (!(qp->flags & HNS_ROCE_QP_CAP_STARS_SQ_MODE)) {
+ if (nreq == 1 &&
+ (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ hns_roce_write_dwqe(qp, qp->cur_wqe);
+ else
+ hns_roce_update_sq_db(ctx, qp);
+ }
if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
*(qp->sdb) = qp->sq.head & 0xffff;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index c7863d7..2ad9ea0 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1211,7 +1211,8 @@ static int check_qp_congest_type(struct hns_roce_context *ctx,
enum {
HNSDV_QP_SUP_CREATE_FLAGS = HNSDV_QP_CREATE_ENABLE_DCA_MODE |
- HNSDV_QP_CREATE_ENABLE_UD_SL,
+ HNSDV_QP_CREATE_ENABLE_UD_SL |
+ HNSDV_QP_CREATE_ENABLE_STARS_MODE,
};
static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
@@ -1224,7 +1225,7 @@ static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
return 0;
if (!check_comp_mask(hns_attr->comp_mask, HNSDV_QP_SUP_COMP_MASK)) {
- verbs_err(&ctx->ibv_ctx, "invalid hnsdv comp_mask 0x%x.\n",
+ verbs_err(&ctx->ibv_ctx, "invalid comp_mask 0x%"PRIu64".\n",
hns_attr->comp_mask);
return -EINVAL;
}
@@ -1232,7 +1233,7 @@ static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
if (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS &&
!check_comp_mask(hns_attr->create_flags,
HNSDV_QP_SUP_CREATE_FLAGS)) {
- verbs_err(&ctx->ibv_ctx, "invalid create_flags 0x%x.\n",
+ verbs_err(&ctx->ibv_ctx, "invalid create_flags 0x%"PRIu32".\n",
hns_attr->create_flags);
return -EOPNOTSUPP;
}
@@ -1244,6 +1245,41 @@ static int check_hnsdv_qp_attr(struct hns_roce_context *ctx,
return 0;
}
+static int check_hnsdv_qp_create_flag(struct hns_roce_context *ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
+ uint32_t *hns_qp_create_flags)
+{
+ struct hns_roce_cq *send_cq = attr->send_cq ?
+ to_hr_cq(attr->send_cq) : NULL;
+
+ if (!hns_attr)
+ return 0;
+
+ if (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_STARS_MODE) {
+ if (attr->qp_type != IBV_QPT_RC) {
+ verbs_err(&ctx->ibv_ctx,
+ "STARS mode only support RC\n");
+ return EINVAL;
+ }
+
+ if (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE) {
+ verbs_err(&ctx->ibv_ctx,
+ "STARS mode don't support DCA\n");
+ return EINVAL;
+ }
+
+ if (!send_cq || !(send_cq->flags & HNS_ROCE_CQ_FLAG_POE_EN)) {
+ verbs_err(&ctx->ibv_ctx,
+ "STARS QP should bind POE CQ with its SQ.\n");
+ return EINVAL;
+ }
+ *hns_qp_create_flags |= HNS_ROCE_CREATE_QP_FLAGS_STARS_MODE;
+ }
+
+ return 0;
+}
+
enum {
CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD |
IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
@@ -1352,6 +1388,11 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
if (ret)
return ret;
+ ret = check_hnsdv_qp_create_flag(ctx, attr, hns_attr,
+ &cmd_flag->create_flags);
+ if (ret)
+ return ret;
+
return verify_qp_create_cap(ctx, attr);
}
@@ -1801,6 +1842,11 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
cmd_ex.congest_type_flags = cmd_flag->congest_type_flags;
}
+ if (cmd_flag->create_flags) {
+ cmd_ex.comp_mask |= HNS_ROCE_CREATE_QP_MASK_CREATE_FLAGS;
+ cmd_ex.create_flags = cmd_flag->create_flags;
+ }
+
ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr,
&cmd_ex.ibv_cmd, sizeof(cmd_ex),
&resp_ex.ibv_resp, sizeof(resp_ex));
@@ -2000,6 +2046,13 @@ struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
struct ibv_qp_init_attr_ex *qp_attr,
struct hnsdv_qp_init_attr *hns_attr)
{
+ struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL;
+
+ if (!ctx || !qp_attr) {
+ errno = EINVAL;
+ return NULL;
+ }
+
if (!is_hns_dev(context->device)) {
errno = EOPNOTSUPP;
return NULL;
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index c5c7c11..98c68fd 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -44,6 +44,7 @@ struct ibv_context *hnsdv_open_device(struct ibv_device *device,
enum hnsdv_qp_create_flags {
HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
HNSDV_QP_CREATE_ENABLE_UD_SL = 1 << 1,
+ HNSDV_QP_CREATE_ENABLE_STARS_MODE = 1 << 2,
};
enum hnsdv_qp_congest_ctrl_type {
@@ -62,6 +63,7 @@ struct hnsdv_qp_init_attr {
uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */
+ uint8_t reserved[7];
};
struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
--
2.25.1

View File

@ -1,58 +0,0 @@
From d13868688d40281f1da602befb19da850b59b725 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:21 +0800
Subject: [PATCH 7/8] Update kernel headers for supporting write with notify
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
To commit 7261847876e4 ("RDMA/hns: Support write with notify")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 5969bde..785c4e1 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -38,6 +38,7 @@
enum hns_roce_create_cq_create_flags {
HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE = 1 << 0,
+ HNS_ROCE_CREATE_CQ_FLAGS_WRITE_WITH_NOTIFY = 1 << 1,
};
struct hns_roce_ib_create_cq {
@@ -47,12 +48,15 @@ struct hns_roce_ib_create_cq {
__u32 reserved;
__aligned_u64 create_flags; /* Use enum hns_roce_create_cq_create_flags */
__u8 poe_channel;
- __u8 rsv[7];
+ __u8 notify_mode;
+ __u16 notify_idx;
+ __u16 rsv[2];
};
enum hns_roce_cq_cap_flags {
HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0,
HNS_ROCE_CQ_FLAG_POE_EN = 1 << 2,
+ HNS_ROCE_CQ_FLAG_NOTIFY_EN = 1 << 3,
};
struct hns_roce_ib_create_cq_resp {
@@ -120,6 +124,7 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
HNS_ROCE_QP_CAP_STARS_SQ_MODE = 1 << 7,
+ HNS_ROCE_QP_CAP_WRITE_WITH_NOTIFY = 1 << 8,
};
struct hns_roce_ib_create_qp_resp {
--
2.25.1

View File

@ -1,196 +0,0 @@
From 64933c92842d34190c8cda9a864505d0558d3f5f Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 30 Oct 2023 16:59:22 +0800
Subject: [PATCH] libhns: Support write with notify
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8C6X4
---------------------------------------------------------------
This patch supports write with notify operation.
To use this operation, you need to configure the CQ to enable write
with notify and bind the CQ to the corresponding RQ.
The address of notify depends on the notify_idx configured by the
user. This index will point to the notify mem array configured by
the kernel ULP.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 23 +++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.h | 1 +
providers/hns/hns_roce_u_verbs.c | 32 ++++++++++++++++++++++++++++++--
providers/hns/hnsdv.h | 17 ++++++++++++++++-
providers/hns/libhns.map | 1 +
5 files changed, 71 insertions(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index ab6d652..495fbcb 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2993,6 +2993,29 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
qp->err = 0;
}
+void hnsdv_wr_write_notify(struct ibv_qp_ex *ibv_qp, uint32_t rkey,
+ uint64_t remote_addr, uint32_t offset, uint8_t value)
+{
+#define NOTIFY_OFFSET_MASK GENMASK(23, 2)
+#define NOTIFY_VAL_SHIFT 24
+ struct hns_roce_rc_sq_wqe *wqe;
+ struct hns_roce_qp *qp;
+
+ if (!ibv_qp)
+ return;
+
+ qp = to_hr_qp(&ibv_qp->qp_base);
+ wqe = init_rc_wqe(qp, ibv_qp->wr_id,
+ HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_NOTIFY);
+ if (unlikely(!wqe))
+ return;
+
+ wqe->va = htole64(remote_addr);
+ wqe->rkey = htole32(rkey);
+ wqe->immtdata = htole32(((uint32_t)value << NOTIFY_VAL_SHIFT) |
+ (offset & NOTIFY_OFFSET_MASK));
+}
+
static int wr_complete(struct ibv_qp_ex *ibv_qp)
{
struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index 50a920f..fa83bbe 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -61,6 +61,7 @@ enum {
HNS_ROCE_WQE_OP_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
HNS_ROCE_WQE_OP_FAST_REG_PMR = 0xa,
HNS_ROCE_WQE_OP_BIND_MW_TYPE = 0xc,
+ HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_NOTIFY = 0x16,
HNS_ROCE_WQE_OP_MASK = 0x1f
};
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 2ad9ea0..5e46f89 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -476,9 +476,26 @@ enum {
};
enum {
- HNSDV_CQ_SUP_CREATE_FLAGS = HNSDV_CQ_CREATE_ENABLE_POE_MODE,
+ HNSDV_CQ_SUP_CREATE_FLAGS = HNSDV_CQ_CREATE_ENABLE_POE_MODE |
+ HNSDV_CQ_CREATE_ENABLE_NOTIFY,
};
+static int verify_notify_attr(struct hns_roce_context *ctx,
+ struct hnsdv_cq_init_attr *hns_cq_attr)
+{
+ if (!(hns_cq_attr->comp_mask & HNSDV_CQ_INIT_ATTR_MASK_CREATE_FLAGS) ||
+ !(hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_NOTIFY))
+ return 0;
+
+ if (hns_cq_attr->notify_mode >= HNSDV_CQ_NOTIFY_MODE_GUARD) {
+ verbs_err(&ctx->ibv_ctx, "Invalid notify mode %u\n",
+ hns_cq_attr->notify_mode);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
static int verify_hnsdv_cq_create_attr(struct hns_roce_context *ctx,
struct hnsdv_cq_init_attr *hns_cq_attr)
{
@@ -499,7 +516,7 @@ static int verify_hnsdv_cq_create_attr(struct hns_roce_context *ctx,
return EOPNOTSUPP;
}
- return 0;
+ return verify_notify_attr(ctx, hns_cq_attr);
}
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
@@ -551,6 +568,12 @@ static void set_hnsdv_cq_attr(struct hns_roce_ib_create_cq *cmd_drv,
cmd_drv->create_flags |= HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE;
cmd_drv->poe_channel = hns_cq_attr->poe_channel;
}
+
+ if (hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_NOTIFY) {
+ cmd_drv->create_flags |= HNS_ROCE_CREATE_CQ_FLAGS_WRITE_WITH_NOTIFY;
+ cmd_drv->notify_mode = hns_cq_attr->notify_mode;
+ cmd_drv->notify_idx = hns_cq_attr->notify_idx;
+ }
}
static int check_hnsdv_cq_flags(struct hnsdv_cq_init_attr *hns_cq_attr,
@@ -562,6 +585,11 @@ static int check_hnsdv_cq_flags(struct hnsdv_cq_init_attr *hns_cq_attr,
if ((hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_POE_MODE) &&
!(cq->flags & HNS_ROCE_CQ_FLAG_POE_EN))
return EOPNOTSUPP;
+
+ if ((hns_cq_attr->create_flags & HNSDV_CQ_CREATE_ENABLE_NOTIFY) &&
+ !(cq->flags & HNS_ROCE_CQ_FLAG_NOTIFY_EN))
+ return EOPNOTSUPP;
+
return 0;
}
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
index 98c68fd..7d95db4 100644
--- a/providers/hns/hnsdv.h
+++ b/providers/hns/hnsdv.h
@@ -100,13 +100,24 @@ enum hnsdv_cq_init_attr_mask {
enum hnsdv_cq_create_flags {
HNSDV_CQ_CREATE_ENABLE_POE_MODE = 1 << 0,
+ HNSDV_CQ_CREATE_ENABLE_NOTIFY = 1 << 1,
+};
+
+enum hnsdv_cq_notify_mode {
+ HNSDV_CQ_NOTIFY_MODE_64B_ALIGN = 0,
+ HNSDV_CQ_NOTIFY_MODE_4B_ALIGN = 1,
+ HNSDV_CQ_NOTIFY_MODE_DDR_64B_ALIGN = 2,
+ HNSDV_CQ_NOTIFY_MODE_DDR_4B_ALIGN = 3,
+ HNSDV_CQ_NOTIFY_MODE_GUARD = 4, /* Invalid for user */
};
struct hnsdv_cq_init_attr {
uint64_t comp_mask; /* Use enum hnsdv_cq_init_attr_mask */
uint64_t create_flags; /* Use enum hnsdv_cq_create_flags */
uint8_t poe_channel; /* poe channel to use */
- uint8_t reserved[7];
+ uint8_t notify_mode;
+ uint16_t notify_idx;
+ uint32_t reserved;
};
struct ibv_cq_ex *hnsdv_create_cq_ex(struct ibv_context *context,
@@ -116,6 +127,10 @@ struct ibv_cq_ex *hnsdv_create_cq_ex(struct ibv_context *context,
/* used in stars mode */
void hnsdv_update_sq_ci(struct ibv_qp *qp, uint32_t inc_idx);
+/* write with notify */
+void hnsdv_wr_write_notify(struct ibv_qp_ex *qpex, uint32_t rkey,
+ uint64_t remote_addr, uint32_t offset, uint8_t value);
+
#ifdef __cplusplus
}
#endif
diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
index 27efc83..74d85e1 100644
--- a/providers/hns/libhns.map
+++ b/providers/hns/libhns.map
@@ -8,5 +8,6 @@ HNS_1.0 {
hnsdv_query_device;
hnsdv_create_cq_ex;
hnsdv_update_sq_ci;
+ hnsdv_wr_write_notify;
local: *;
};
--
2.25.1

View File

@ -1,58 +0,0 @@
From 7d81108ba99d349558bc8c6d65c787efc31c52f4 Mon Sep 17 00:00:00 2001
From: Junxian Huang <huangjunxian6@hisilicon.com>
Date: Wed, 22 Nov 2023 16:07:14 +0800
Subject: [PATCH] libhns: Get dmac from kernel driver
driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I8HZ7W
--------------------------------------------------------------------------
As dmac is already resolved in kernel while creating AH, there is no
need to repeat the resolving in userspace. Prioritizes getting dmac
from kernel driver, unless kernel driver didn't response one.
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
kernel-headers/rdma/hns-abi.h | 2 +-
providers/hns/hns_roce_u_verbs.c | 10 +++++++---
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 785c4e1..8581df9 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -135,7 +135,7 @@ struct hns_roce_ib_create_qp_resp {
struct hns_roce_ib_create_ah_resp {
__u8 priority;
__u8 tc_mode;
- __u8 reserved[6];
+ __u8 dmac[6];
};
struct hns_roce_ib_modify_qp_resp {
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 5e46f89..c906632 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -2210,9 +2210,13 @@ struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
goto err;
- if (hr_dev->link_type != HNS_DEV_LINK_TYPE_UB &&
- ibv_resolve_eth_l2_from_gid(pd->context, attr, ah->av.mac, NULL))
- goto err;
+ if (hr_dev->link_type != HNS_DEV_LINK_TYPE_UB) {
+ if (memcmp(ah->av.mac, resp.dmac, ETH_ALEN))
+ memcpy(ah->av.mac, resp.dmac, ETH_ALEN);
+ else if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
+ ah->av.mac, NULL))
+ goto err;
+ }
if (resp.tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
ah->av.sl = resp.priority;
--
2.25.1

View File

@ -1,88 +0,0 @@
From 95f3cc1f25c091f97aefceac268fe6435d0861c3 Mon Sep 17 00:00:00 2001
From: Ran Zhou <zhouran10@partner.com>
Date: Mon, 27 Nov 2023 16:39:48 +0800
Subject: [PATCH 71/75] libhns: Corrects several issues with output format and
variable types.
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8J2S3?from=project-issue
--------------------------------------------------------------------------
1.Unify the types of two variables to avoid infinite loop.
2.Standardization the output control character.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
providers/hns/hns_roce_u.c | 2 +-
providers/hns/hns_roce_u_hw_v2.c | 10 +++++-----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 084385b..f30486f 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -179,7 +179,7 @@ static void set_dca_pool_param(struct hns_roce_context *ctx,
dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE;
verbs_debug(&ctx->ibv_ctx,
- "Support DCA, unit %d, max %ld, min %ld Bytes.\n",
+ "Support DCA, unit %u, max %lu, min %lu Bytes.\n",
dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size);
}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 495fbcb..754f918 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2680,8 +2680,8 @@ static void set_inline_data_list_rc(struct hns_roce_qp *qp,
{
unsigned int msg_len = qp->sge_info.total_len;
void *dseg;
+ size_t i;
int ret;
- int i;
hr_reg_enable(wqe, RCWQE_INLINE);
@@ -2741,7 +2741,7 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
{
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
struct hns_roce_rc_sq_wqe *wqe = qp->cur_wqe;
- int i;
+ size_t i;
if (!wqe)
return;
@@ -2872,7 +2872,7 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
}
hr_reg_write(wqe, UDWQE_MSG_START_SGE_IDX, sge_idx & mask);
- for (int i = 0; i < num_sge; i++) {
+ for (size_t i = 0; i < num_sge; i++) {
if (!sg_list[i].length)
continue;
@@ -2899,8 +2899,8 @@ static void set_inline_data_list_ud(struct hns_roce_qp *qp,
uint8_t data[HNS_ROCE_MAX_UD_INL_INN_SZ] = {};
unsigned int msg_len = qp->sge_info.total_len;
void *tmp;
+ size_t i;
int ret;
- int i;
if (!check_inl_data_len(qp, msg_len)) {
qp->err = EINVAL;
@@ -2962,7 +2962,7 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
{
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
struct hns_roce_ud_sq_wqe *wqe = qp->cur_wqe;
- int i;
+ size_t i;
if (!wqe)
return;
--
2.25.1

View File

@ -1,45 +0,0 @@
From 884663cd803e3781aec9c45f7fc0145b6a2b4539 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Mon, 27 Nov 2023 16:39:49 +0800
Subject: [PATCH 72/75] libhns: Add input parameter check for
hnsdv_query_device()
driver inclusion
category: cleanup
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8J2W9?from=project-issue
--------------------------------------------------------------------------
Add null pointer check for pointer parameter.
Signed-off-by: Luoyouming <luoyouming@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index c906632..e597e93 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -126,15 +126,16 @@ int hnsdv_query_device(struct ibv_context *context,
struct hnsdv_context *attrs_out)
{
struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL;
- struct hns_roce_device *hr_dev = to_hr_dev(context->device);
+ struct hns_roce_device *hr_dev;
if (!ctx || !attrs_out)
return EINVAL;
- if (!hr_dev) {
+ if (!context->device && !is_hns_dev(context->device)) {
verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n");
return EOPNOTSUPP;
}
+ hr_dev = to_hr_dev(context->device);
memset(attrs_out, 0, sizeof(*attrs_out));
--
2.25.1

View File

@ -1,63 +0,0 @@
From 57499cbc906ec4065f65bb8dfda56081a799746e Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 27 Nov 2023 16:39:51 +0800
Subject: [PATCH 73/75] libhns: Fix uninitialized qp attr when flush cqe
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8J2XW?from=project-issue
--------------------------------------------------------------------------
When flushing cqe, the state of qp needs to be modified. The incoming qp
attr is not initialized, which may lead to undefined behavior.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 754f918..b48cabd 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -405,7 +405,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
static int hns_roce_flush_cqe(struct hns_roce_qp *hr_qp, uint8_t status)
{
- struct ibv_qp_attr attr;
+ struct ibv_qp_attr attr = {};
int attr_mask;
if (status != HNS_ROCE_V2_CQE_WR_FLUSH_ERR) {
@@ -1614,8 +1614,8 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
struct hns_roce_qp *qp = to_hr_qp(ibvqp);
struct hns_roce_sge_info sge_info = {};
struct hns_roce_rc_sq_wqe *wqe;
+ struct ibv_qp_attr attr = {};
unsigned int wqe_idx, nreq;
- struct ibv_qp_attr attr;
int ret;
ret = check_qp_send(qp, ctx);
@@ -1788,7 +1788,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
struct hns_roce_qp *qp = to_hr_qp(ibvqp);
unsigned int wqe_idx, nreq, max_sge;
- struct ibv_qp_attr attr;
+ struct ibv_qp_attr attr = {};
int ret;
ret = check_qp_recv(qp, ctx);
@@ -3021,7 +3021,7 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
unsigned int nreq = qp->sq.head - qp->rb_sq_head;
- struct ibv_qp_attr attr;
+ struct ibv_qp_attr attr = {};
int err = qp->err;
if (err) {
--
2.25.1

View File

@ -1,64 +0,0 @@
From 9e3f4aa0a83ea0ff9512678e3932e611186d573e Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 27 Nov 2023 16:39:50 +0800
Subject: [PATCH 74/75] libhns: Fix possible overflow in cq clean
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8J2XP?from=project-issue
--------------------------------------------------------------------------
The ci/pi of hns roce cq allows data to be flipped. but in
__hns_roce_v2_cq_clean(), this flip may lead to an wrong number
of loops.
This patch fixes it by extending the data type to avoid data
flipping.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b48cabd..fc938de 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1847,20 +1847,21 @@ out:
static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
struct hns_roce_srq *srq)
{
- int nfreed = 0;
- bool is_recv_cqe;
- uint8_t owner_bit;
- uint16_t wqe_index;
- uint32_t prod_index;
- struct hns_roce_v2_cqe *cqe, *dest;
- struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
-
- for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index);
- ++prod_index)
- if (prod_index > cq->cons_index + cq->verbs_cq.cq.cqe)
+ struct hns_roce_context *ctx = to_hr_ctx(cq->verbs_cq.cq.context);
+ uint64_t cons_index = cq->cons_index;
+ uint64_t prod_index = cq->cons_index;
+ struct hns_roce_v2_cqe *cqe, *dest;
+ uint16_t wqe_index;
+ uint8_t owner_bit;
+ bool is_recv_cqe;
+ int nfreed = 0;
+
+ for (; get_sw_cqe_v2(cq, prod_index); ++prod_index)
+ if (prod_index > cons_index + cq->verbs_cq.cq.cqe)
break;
- while ((int) --prod_index - (int) cq->cons_index >= 0) {
+ while (prod_index - cons_index > 0) {
+ prod_index--;
cqe = get_cqe_v2(cq, prod_index & cq->verbs_cq.cq.cqe);
if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
is_recv_cqe = hr_reg_read(cqe, CQE_S_R);
--
2.25.1

View File

@ -1,36 +0,0 @@
From c989f93b01c45f8453bc839722f91ddc8b000037 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 27 Nov 2023 16:39:52 +0800
Subject: [PATCH 75/75] libhns: Fix unnecessary dca memory detach
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8J2Y5?from=project-issue
--------------------------------------------------------------------------
If sq is not enabled, it should not detach dca mem. Moreover, under the
current code logic, if detach dca mem, its sq index will be a random
value because it is not initialized.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index fc938de..2fb738d 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -696,7 +696,7 @@ static void dca_detach_qp_buf(struct hns_roce_context *ctx,
hns_roce_spin_unlock(&qp->rq.hr_lock);
hns_roce_spin_unlock(&qp->sq.hr_lock);
- if (is_empty)
+ if (is_empty && qp->sq.wqe_cnt > 0)
hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
}
--
2.25.1

View File

@ -1,43 +0,0 @@
From 324cd24a22256d964689bf528b643ae06d5a4e58 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Fri, 1 Dec 2023 10:43:23 +0800
Subject: [PATCH] libhns: Bugfix for wrong timing of modifying ibv_qp state to
err
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8L4YU
--------------------------------------------------------------------------
Currently the QPC state in HW is modified inside the critical section of
spinlock but the ibv_qp state is modified outside. There will be a short
period when QPC state has been modified to err with ibv_qp state still
remaining RTS. WQEs during this period will still be post-send by RTS-state
ibv_qp but then dropped by err-state HW with no flush CQEs generated.
To fix this problem, the QPC state in HW and ibv_qp state should be both
modified to err inside the critical section of spinlock.
Fixes: f1a80cc3dfe2 ("libhns: Bugfix for flush cqe in case multi-process")
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 2fb738d..68d7110 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1936,6 +1936,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
sizeof(resp_ex));
if (flag) {
+ if (!ret)
+ qp->state = IBV_QPS_ERR;
hns_roce_spin_unlock(&hr_qp->sq.hr_lock);
hns_roce_spin_unlock(&hr_qp->rq.hr_lock);
}
--
2.25.1

View File

@ -1,72 +0,0 @@
From 12f8951a6a98f82dd1f70afc23d66e328cf1988b Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Thu, 7 Dec 2023 09:47:59 +0800
Subject: [PATCH] libhns: Fix parent domain unsupported comp mask
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF28
Hns do not support any comp mask for parent domain. Driver returns
EINVAL if any compmask is set.
This patch Replace the inappropriate return value EINVAL with
EOPNOTSUPP.
The error is found by testcase test_mem_align_ud_traffic.
ERROR: test_mem_align_ud_traffic (tests.test_parent_domain.ParentDomain
TrafficTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File "./tests/test_parent_domain.py", line 183, in test_mem_align_ud
_traffic
self.create_players(parent_domain_ud_res,
File "./tests/test_parent_domain.py", line 156, in create_players
self.client = resource(**self.dev_info, **resource_arg)
File "./tests/test_parent_domain.py", line 90, in __init__
super().__init__(**kwargs)
File "./tests/base.py", line 617, in __init__
super(RoCETrafficResources, self).__init__(dev_name, ib_port,
gid_index, **kwargs)
File "./tests/base.py", line 503, in __init__
super(TrafficResources, self).__init__(dev_name=dev_name,
File "./tests/base.py", line 477, in __init__
self.create_pd()
File "./tests/test_parent_domain.py", line 95, in create_pd
create_parent_domain_with_allocators(self)
File "./tests/test_parent_domain.py", line 69, in create_parent_
domain_with_allocators
raise ex
File "./tests/test_parent_domain.py", line 65, in create_parent_
domain_with_allocators
res.pd = ParentDomain(res.ctx, attr=pd_attr)
File "pd.pyx", line 261, in pyverbs.pd.ParentDomain.__init__
pyverbs.pyverbs_error.PyverbsRDMAError: Failed to allocate Parent
Domain.Errno: 22, Invalid argument
Fixes: cfe548d4c78e ("libhns: Add support for the thread
domain and the parent domain")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_verbs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 34e8d07..ba3fef6 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -238,7 +238,7 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
return NULL;
if (attr->comp_mask) {
- errno = EINVAL;
+ errno = EOPNOTSUPP;
return NULL;
}
--
2.25.1

View File

@ -1,216 +0,0 @@
From 3a1432cfdb7c696d3acf97025e6d74bbf3e520dc Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Thu, 7 Dec 2023 09:48:00 +0800
Subject: [PATCH 78/80] libhns: Add
pthread_spin_destroy()/pthread_mutex_destroy()
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF59
--------------------------------------------------------------------------
The functions pthread_spin_destroy()/pthread_mutex_destroy()
corresponds to pthread_spin_init()/pthread_mutex_init(). The
driver should call pthread_spin_destroy()/pthread_mutex_destroy()
to clean up resources before exiting.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
providers/hns/hns_roce_u.c | 61 +++++++++++++++++++++++++++-----
providers/hns/hns_roce_u_hw_v2.c | 1 +
providers/hns/hns_roce_u_verbs.c | 17 ++++++---
3 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index f30486f..dfcd798 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -346,6 +346,47 @@ static int query_dev_attr(struct hns_roce_context *context,
return 0;
}
+static int hns_roce_init_context_lock(struct hns_roce_context *context)
+{
+ int ret;
+
+ ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret)
+ return ret;
+
+ ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
+ if (ret)
+ goto destroy_uar_lock;
+
+ ret = pthread_mutex_init(&context->srq_table_mutex, NULL);
+ if (ret)
+ goto destroy_qp_mutex;
+
+ ret = pthread_mutex_init(&context->db_list_mutex, NULL);
+ if (ret)
+ goto destroy_srq_mutex;
+
+ return 0;
+
+destroy_srq_mutex:
+ pthread_mutex_destroy(&context->srq_table_mutex);
+
+destroy_qp_mutex:
+ pthread_mutex_destroy(&context->qp_table_mutex);
+
+destroy_uar_lock:
+ pthread_spin_destroy(&context->uar_lock);
+ return ret;
+}
+
+static void hns_roce_destroy_context_lock(struct hns_roce_context *context)
+{
+ pthread_spin_destroy(&context->uar_lock);
+ pthread_mutex_destroy(&context->qp_table_mutex);
+ pthread_mutex_destroy(&context->srq_table_mutex);
+ pthread_mutex_destroy(&context->db_list_mutex);
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -365,7 +406,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
ucontext_set_cmd(&cmd, ctx_attr);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
- goto err_free;
+ goto err_ibv_cmd;
+
+ if (hns_roce_init_context_lock(context))
+ goto err_ibv_cmd;
hr_dev->congest_type = resp.congest_type;
@@ -383,23 +427,23 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
HNS_ROCE_QP_TABLE_BITS);
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
- pthread_mutex_init(&context->qp_table_mutex, NULL);
+
for (i = 0; i < HNS_ROCE_QP_TABLE_SIZE; ++i)
context->qp_table[i].refcnt = 0;
context->srq_table_shift = calc_table_shift(resp.srq_tab_size,
HNS_ROCE_SRQ_TABLE_BITS);
context->srq_table_mask = (1 << context->srq_table_shift) - 1;
- pthread_mutex_init(&context->srq_table_mutex, NULL);
+
for (i = 0; i < HNS_ROCE_SRQ_TABLE_SIZE; ++i)
context->srq_table[i].refcnt = 0;
if (query_dev_attr(context, hr_dev, &resp))
- goto err_free;
+ goto err_query_dev;
if (init_dca_context(context, cmd_fd,
&resp, ctx_attr, hr_dev->page_size))
- goto err_free;
+ goto err_query_dev;
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
goto reset_free;
@@ -407,8 +451,6 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (hns_roce_mmap(hr_dev, context, cmd_fd))
goto uar_free;
- pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
-
verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops);
@@ -419,7 +461,9 @@ uar_free:
munmap(context->reset_state, hr_dev->page_size);
reset_free:
uninit_dca_context(context);
-err_free:
+err_query_dev:
+ hns_roce_destroy_context_lock(context);
+err_ibv_cmd:
verbs_uninit_context(&context->ibv_ctx);
free(context);
return NULL;
@@ -434,6 +478,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
if (context->reset_state)
munmap(context->reset_state, hr_dev->page_size);
uninit_dca_context(context);
+ hns_roce_destroy_context_lock(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 68d7110..b2a8858 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2049,6 +2049,7 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
hns_roce_free_qp_buf(qp, ctx);
+ hns_roce_qp_spinlock_destroy(qp);
free(qp);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index fae6126..ba3fef6 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -803,6 +803,7 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
int hns_roce_u_destroy_cq(struct ibv_cq *cq)
{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
int ret;
ret = ibv_cmd_destroy_cq(cq);
@@ -811,10 +812,13 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq)
hns_roce_uninit_cq_swc(to_hr_cq(cq));
- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
+ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db,
HNS_ROCE_CQ_TYPE_DB);
- hns_roce_free_buf(&to_hr_cq(cq)->buf);
- free(to_hr_cq(cq));
+ hns_roce_free_buf(&hr_cq->buf);
+
+ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
+
+ free(hr_cq);
return ret;
}
@@ -1071,7 +1075,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
set_srq_param(context, srq, init_attr);
if (alloc_srq_buf(srq))
- goto err_free_srq;
+ goto err_destroy_lock;
srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
if (!srq->rdb)
@@ -1102,6 +1106,9 @@ err_srq_db:
err_srq_buf:
free_srq_buf(srq);
+err_destroy_lock:
+ hns_roce_spinlock_destroy(&srq->hr_lock);
+
err_free_srq:
free(srq);
@@ -1191,6 +1198,8 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
free_srq_buf(srq);
+
+ hns_roce_spinlock_destroy(&srq->hr_lock);
free(srq);
return 0;
--
2.25.1

View File

@ -1,38 +0,0 @@
From 8759b0e6ec4e73994743c1ae0d0ecc186688b6d6 Mon Sep 17 00:00:00 2001
From: Ran Zhou <zhouran10@h-partners.com>
Date: Thu, 7 Dec 2023 09:48:01 +0800
Subject: [PATCH 79/80] libhns: Removes a repeated initialization of a spinlock
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF83
--------------------------------------------------------------------------
The pthread_spin_init() of qp was done in create_qp(). We removed
the spinlock init in this place to avoid initializing twice.
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
---
providers/hns/hns_roce_u_verbs.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index ba3fef6..c404948 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1933,10 +1933,6 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
{
int ret;
- if (pthread_spin_init(&qp->sq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE) ||
- pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
- return -ENOMEM;
-
ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
if (ret)
return ret;
--
2.25.1

View File

@ -1,95 +0,0 @@
From 96d30f16bc03167c7c52e663785192382688f542 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Thu, 7 Dec 2023 09:48:02 +0800
Subject: [PATCH 80/80] libhns: Fix owner bit when SQ wraps around in new IO
driver inclusion
category: bugfix
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I8MF9Q
--------------------------------------------------------------------------
The owner bit has been write in init_rc_wqe() or init_ud_wqe()
with a write value. And it will be overwritten by some subsequent
operations. When the SQ wraps around, the overwritten value will be
an incorrect value.
For example, driver will assign the owner bit in the second step,
and overwrite it in the third step.
```c
ibv_wr_start();
ibv_wr_rdma_write();
if (inline)
ibv_wr_set_inline_data_list();
else
ibv_wr_set_sge_list();
ibv_wr_complete();
```
This patch removes the redundant owner bit assignment operations
in new IO.
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism")
Fixes: 163d62ca6196 ("libhns: Fix the owner bit error of sq in new io")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
---
providers/hns/hns_roce_u_hw_v2.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index b2a8858..acbc854 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -2544,8 +2544,6 @@ static void wr_set_sge_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_sge,
wqe->msg_len = htole32(qp->sge_info.total_len);
hr_reg_write(wqe, RCWQE_SGE_NUM, qp->sge_info.valid_num);
-
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_send_rc(struct ibv_qp_ex *ibv_qp)
@@ -2737,7 +2735,6 @@ static void wr_set_inline_data_rc(struct ibv_qp_ex *ibv_qp, void *addr,
qp->sge_info.total_len = length;
set_inline_data_list_rc(qp, wqe, 1, &buff);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
@@ -2755,7 +2752,6 @@ static void wr_set_inline_data_list_rc(struct ibv_qp_ex *ibv_qp, size_t num_buf,
qp->sge_info.total_len += buf_list[i].length;
set_inline_data_list_rc(qp, wqe, num_buf, buf_list);
- enable_wqe(qp, wqe, qp->sq.head);
}
static struct hns_roce_ud_sq_wqe *
@@ -2892,7 +2888,6 @@ static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
hr_reg_write(wqe, UDWQE_SGE_NUM, cnt);
qp->sge_info.start_idx += cnt;
- enable_wqe(qp, wqe, qp->sq.head);
}
static void set_inline_data_list_ud(struct hns_roce_qp *qp,
@@ -2958,7 +2953,6 @@ static void wr_set_inline_data_ud(struct ibv_qp_ex *ibv_qp, void *addr,
qp->sge_info.total_len = length;
set_inline_data_list_ud(qp, wqe, 1, &buff);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
@@ -2976,7 +2970,6 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
qp->sge_info.total_len += buf_list[i].length;
set_inline_data_list_ud(qp, wqe, num_buf, buf_list);
- enable_wqe(qp, wqe, qp->sq.head);
}
static void wr_start(struct ibv_qp_ex *ibv_qp)
--
2.25.1

View File

@ -1,83 +0,0 @@
From cad30f3d98525d14796094b2905de222c894464f Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Fri, 8 Dec 2023 09:49:42 +0800
Subject: [PATCH] libhns: Fix missing DB when compiler does not support SVE
driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I8MPTX
----------------------------------------------------------------------
Currently, if compiler does not support SVE, hns_roce_sve_write512() will
be a empty function, which means that this doorbell will be missed when
HNS_ROCE_QP_CAP_SVE_DIRECT_WQE is set in qp flag.
This patch ensures that driver will at least generate the DB regardless
of whether SVE DWQE is supported or not.
Fixes: 7b1f5c5654c2 ("libhns: Add support for SVE Direct WQE function")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
---
providers/hns/hns_roce_u_hw_v2.c | 33 +++++++++++++-------------------
1 file changed, 13 insertions(+), 20 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index acbc854..be4c9f3 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -318,26 +318,22 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db);
}
-static void hns_roce_write512(uint64_t *dest, uint64_t *val)
+static void hns_roce_qp_write512(struct hns_roce_qp *qp, uint64_t *val)
{
- mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
-}
+ uint64_t *dest = qp->sq.db_reg;
#if defined(HNS_SVE)
-static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val)
-{
- asm volatile(
- "ldr z0, [%0]\n"
- "str z0, [%1]\n"
- ::"r" (val), "r"(dest):"cc", "memory"
- );
-}
-#else
-static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val)
-{
- return;
-}
+ if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE) {
+ asm volatile(
+ "ldr z0, [%0]\n"
+ "str z0, [%1]\n"
+ ::"r" (val), "r"(dest):"cc", "memory"
+ );
+ return;
+ }
#endif
+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
+}
static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
{
@@ -355,10 +351,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT);
hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head);
- if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE)
- hns_roce_sve_write512(qp->sq.db_reg, wqe);
- else
- hns_roce_write512(qp->sq.db_reg, wqe);
+ hns_roce_qp_write512(qp, wqe);
}
static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
--
2.25.1

Binary file not shown.

BIN
rdma-core-50.0.tar.gz Normal file

Binary file not shown.

View File

@ -1,93 +1,11 @@
Name: rdma-core
Version: 41.0
Release: 27
Version: 50.0
Release: 1
Summary: RDMA core userspace libraries and daemons
License: GPLv2 or BSD
Url: https://github.com/linux-rdma/rdma-core
Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz
Patch0: 0001-libhns-Use-a-constant-instead-of-sizeof-operation.patch
Patch1: 0002-libhns-Fix-ext_sge-num-error-when-post-send.patch
Patch2: 0003-Update-kernel-headers.patch
Patch3: 0004-libhns-Fix-the-problem-of-sge-nums.patch
Patch4: 0005-Update-kernel-headers.patch
Patch5: 0006-libhns-Add-compatibility-handling-for-rq-inline.patch
Patch6: 0007-libhns-Refactor-rq-inline.patch
Patch7: 0008-libhns-RQ-inline-support-wc_x_poll_cq-interface.patch
Patch8: 0009-Update-kernel-headers.patch
Patch9: 0010-libhns-Support-cqe-inline.patch
Patch10: 0011-Update-kernel-headers.patch
Patch11: 0012-libhns-Support-DSCP.patch
Patch12: 0013-cma-Release-allocated-port-array.patch
Patch13: 0014-rsockets-Fix-allocation-size-There-is-memory-allocat.patch
Patch14: 0015-tests-test_mr.py-Change-the-argument-of-DmaBufMR-to-.patch
Patch15: 0016-ABI-Files.patch
Patch16: 0017-mlx5-Adjust-Crypto-BSF-size-if-signature-is-used.patch
Patch17: 0018-mlx5-DR-Fix-missing-comma-in-matcher-builder-dump-li.patch
Patch18: 0019-Install-xprtrdma-svcrdma-kmods-in-redhat-suse-dracut.patch
Patch19: 0020-providers-irdma-Explicitly-set-QP-modify-attributes-.patch
Patch20: 0021-providers-irdma-Use-s-g-array-in-post-send-only-when.patch
Patch21: 0022-providers-irdma-Report-correct-WC-errors.patch
Patch22: 0023-pyverbs-Increment-the-correct-rkey-in-test_qpex.patch
Patch23: 0024-mckey-Use-rdma_create_qp_ex-only-for-loopback-preven.patch
Patch24: 0025-Fix-spelling-mistake-of-underlying.patch
Patch25: 0026-rdma-ndd-disable-systemd-ProtectHostName-feature.patch
Patch26: 0027-libhns-Add-RoH-device-IDs.patch
Patch27: 0028-Update-kernel-headers.patch
Patch28: 0029-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch
Patch29: 0030-libhns-Add-support-for-the-thread-domain-and-the-par.patch
Patch30: 0031-Update-kernel-headers.patch
Patch31: 0032-libhns-Introduce-DCA-for-RC-QP.patch
Patch32: 0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch
Patch33: 0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch
Patch34: 0035-libhns-Use-shared-memory-to-sync-DCA-status.patch
Patch35: 0036-libhns-Sync-DCA-status-by-shared-memory.patch
Patch36: 0037-libhns-Add-direct-verbs-support-to-config-DCA.patch
Patch37: 0038-Update-kernel-headers.patch
Patch38: 0039-libhns-Add-reset-stop-flow-mechanism.patch
Patch39: 0040-Update-kernel-headers.patch
Patch40: 0041-libhns-Support-congestion-control-algorithm-configur.patch
Patch41: 0042-Update-kernel-headers.patch
Patch42: 0043-libhns-Add-support-for-SVE-Direct-WQE.patch
Patch43: 0044-libhns-Fix-the-sge-num-problem-of-atomic-op.patch
Patch44: 0045-libhns-Fix-sge-tail_len-overflow.patch
Patch45: 0046-libhns-Disable-local-invalidate-operation.patch
Patch46: 0047-libhns-Fix-the-owner-bit-error-of-sq-in-new-io.patch
Patch47: 0048-libhns-Fix-incorrect-post-send-with-direct-wqe-of-wr.patch
Patch48: 0049-libhns-Add-a-judgment-to-the-congestion-control-algo.patch
Patch49: 0050-libhns-Support-user-to-choose-using-UD-sl-or-pktype-.patch
Patch50: 0051-libhns-Remove-unnecessary-QP-checks.patch
Patch51: 0052-libhns-Fix-reference-to-uninitialized-cq-pointer.patch
Patch52: 0053-libhns-Support-reporting-wc-as-software-mode.patch
Patch53: 0054-libhns-return-error-when-post-send-in-reset-state.patch
Patch54: 0055-libhns-separate-the-initialization-steps-of-lock.patch
Patch55: 0056-libhns-assign-doorbell-to-zero-when-allocate-it.patch
patch56: 0057-libhns-Fix-missing-reset-notification.patch
patch57: 0058-libhns-Support-flexible-WQE-buffer-page-size.patch
patch58: 0059-Update-kernel-headers.patch
patch59: 0060-libhns-Support-SRQ-record-doorbell.patch
patch60: 0061-libhns-Skip-resolving-MAC-for-RDMA-over-UBLink.patch
patch61: 0062-Update-kernel-headers-for-libhns-query_device.patch
patch62: 0063-libhns-Support-query-HW-ID-by-hnsdv_query_device.patch
patch63: 0064-Update-kernel-headers-for-supporting-POE-CQs.patch
patch64: 0065-libhns-Add-support-for-POE-CQs.patch
patch65: 0066-Update-kernel-headers-for-supporting-STARS-QP-in-HNS.patch
patch66: 0067-libhns-Support-STARS-mode-QP.patch
patch67: 0068-Update-kernel-headers-for-supporting-write-with-noti.patch
patch68: 0069-libhns-Support-write-with-notify.patch
patch70: 0070-libhns-Get-dmac-from-kernel-driver.patch
patch71: 0071-libhns-Corrects-several-issues-with-output-format-an.patch
patch72: 0072-libhns-Add-input-parameter-check-for-hnsdv_query_dev.patch
patch73: 0073-libhns-Fix-uninitialized-qp-attr-when-flush-cqe.patch
patch74: 0074-libhns-Fix-possible-overflow-in-cq-clean.patch
patch75: 0075-libhns-Fix-unnecessary-dca-memory-detach.patch
patch76: 0076-libhns-Bugfix-for-wrong-timing-of-modifying-ibv_qp-s.patch
patch77: 0077-libhns-Fix-parent-domain-unsupported-comp-mask.patch
patch78: 0078-libhns-Add-pthread_spin_destroy-pthread_mutex_destro.patch
patch79: 0079-libhns-Removes-a-repeated-initialization-of-a-spinlo.patch
patch80: 0080-libhns-Fix-owner-bit-when-SQ-wraps-around-in-new-IO.patch
patch81: 0081-libhns-Fix-missing-DB-when-compiler-does-not-support.patch
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators
@ -175,6 +93,8 @@ Provides: libipathverbs = %{version}-%{release}
Obsoletes: libipathverbs < %{version}-%{release}
Provides: libirdma = %{version}-%{release}
Obsoletes: libirdma < %{version}-%{release}
Provides: libmana = %{version}-%{release}
Obsoletes: libmana < %{version}-%{release}
Provides: libmlx4 = %{version}-%{release}
Obsoletes: libmlx4 < %{version}-%{release}
Provides: libmlx5 = %{version}-%{release}
@ -201,6 +121,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- libhns: HiSilicon Hip08/09/10 SoC
- libipathverbs: QLogic InfiniPath HCA
- libirdma: Intel Ethernet Connection RDMA
- libmana: Microsoft Azure Network Adapter
- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
- libmlx5: Mellanox Connect-IB/X-4+ InfiniBand HCA
- libmthca: Mellanox InfiniBand HCA
@ -395,7 +316,6 @@ fi
%config(noreplace) %{_sysconfdir}/rdma/modules/opa.conf
%config(noreplace) %{_sysconfdir}/rdma/modules/rdma.conf
%config(noreplace) %{_sysconfdir}/rdma/modules/roce.conf
%config(noreplace) %{_sysconfdir}/udev/rules.d/*
%dir %{_sysconfdir}/modprobe.d
%config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf
%config(noreplace) %{_sysconfdir}/modprobe.d/truescale.conf
@ -557,9 +477,9 @@ fi
%dir %{_sysconfdir}/libibverbs.d
%dir %{_libdir}/libibverbs
%{_libdir}/libefa.so.*
%{_libdir}/libhns.so.*
%{_libdir}/libibverbs*.so.*
%{_libdir}/libibverbs/*.so
%{_libdir}/libmana.so.*
%{_libdir}/libmlx5.so.*
%{_libdir}/libmlx4.so.*
%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver
@ -658,9 +578,16 @@ fi
%doc %{_docdir}/%{name}-%{version}/librdmacm.md
%doc %{_docdir}/%{name}-%{version}/libibverbs.md
%doc %{_docdir}/%{name}-%{version}/tag_matching.md
%doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules
%{_mandir}/*
%changelog
* Tue Feb 6 2024 Ran Zhou <zhouran10@h-partners.com> - 50.0-1
- Type: requirement
- ID: NA
- SUG: NA
- DESC: Update to 50.0
* Thu Jan 25 2024 Ran Zhou <zhouran10@h-partners.com> - 41.0-27
- Type: requirement
- ID: NA