Support hns roce DCA

DCA(Dynamic context attachment) support many RC QPs to share the WQE
buffer in a memory pool, this help reducing the memory consumption
when there are many QPs are inactive.

Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
This commit is contained in:
Chengchang Tang 2022-11-30 17:03:44 +08:00 committed by Chengchang Tang
parent c4527766ae
commit b88a370b79
8 changed files with 2488 additions and 1 deletions

View File

@ -0,0 +1,152 @@
From 7d72b40d311875677135289874d4a69e4891b0de Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 28 Nov 2022 21:52:20 +0800
Subject: Update kernel headers
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA
mode").
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
kernel-headers/rdma/hns-abi.h | 84 ++++++++++++++++++++++++++++++++---
1 file changed, 78 insertions(+), 6 deletions(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 9866c51..6950841 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -77,7 +77,9 @@ enum hns_roce_qp_cap_flags {
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4,
HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
+ HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6,
};
struct hns_roce_ib_create_qp_resp {
@@ -95,33 +97,46 @@ struct hns_roce_ib_modify_qp_resp {
__u8 tc_mode;
__u8 priority;
__u8 reserved[6];
+ __u32 dcan;
+ __u32 rsv2;
};
enum {
HNS_ROCE_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
+ HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
};
enum {
HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
+ HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
};
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
- __u32 srq_tab_size;
- __u32 reserved;
- __u32 config;
- __u32 max_inline_data;
- __u8 mac_type;
- __u8 rsv1[7];
+ __u32 srq_tab_size;
+ __u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
+ __u8 mac_type;
+ __u8 rsv1[7];
+ __u32 dca_qps;
+ __u32 dca_mmap_size;
+ __aligned_u64 dca_mmap_key;
+};
+
+enum hns_roce_uctx_comp_mask {
+ HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0,
};
struct hns_roce_ib_alloc_ucontext {
__u32 config;
+ __u32 comp; /* use hns_roce_uctx_comp_mask */
+ __u32 dca_max_qps;
__u32 reserved;
};
@@ -129,4 +144,61 @@ struct hns_roce_ib_alloc_pd_resp {
__u32 pdn;
};
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+enum hns_ib_objects {
+ HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_methods {
+ HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_METHOD_DCA_MEM_DEREG,
+ HNS_IB_METHOD_DCA_MEM_SHRINK,
+ HNS_IB_METHOD_DCA_MEM_ATTACH,
+ HNS_IB_METHOD_DCA_MEM_DETACH,
+ HNS_IB_METHOD_DCA_MEM_QUERY,
+};
+
+enum hns_ib_dca_mem_reg_attrs {
+ HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_REG_FLAGS,
+ HNS_IB_ATTR_DCA_MEM_REG_LEN,
+ HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+ HNS_IB_ATTR_DCA_MEM_REG_KEY,
+};
+
+enum hns_ib_dca_mem_dereg_attrs {
+ HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_shrink_attrs {
+ HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+ HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+};
+
+enum hns_ib_dca_mem_attach_attrs {
+ HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+ HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+};
+
+enum hns_ib_dca_mem_detach_attrs {
+ HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+};
+
+enum hns_ib_dca_mem_query_attrs {
+ HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+ HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+};
+
#endif /* HNS_ABI_USER_H */
--
2.30.0

View File

@ -0,0 +1,342 @@
From 58de0f69573e8b76affe401a261f17f1a5cedc01 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:09 +0800
Subject: libhns: Introduce DCA for RC QP
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The HIP09 introduces the DCA(Dynamic context attachment) feature which
supports many RC QPs to share the WQE buffer in a memory pool, this will
reduce the memory consumption when there are too many QPs inactive.
Two functions are defined for adding buffers to memory pool and removing
buffers from memory pool by calling ib cmd implemented in hns kernelspace
driver.
If a QP enables DCA feature, the WQE's buffer will be attached to the
memory pool when the users start to post WRs and be detached when all CQEs
has been polled.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 59 ++++++++++++-
providers/hns/hns_roce_u.h | 21 +++++
providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++
3 files changed, 223 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 3428bda..bd2b251 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -95,6 +95,53 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ int ret;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return 0;
+
+ list_head_init(&dca_ctx->mem_list);
+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret)
+ return ret;
+
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+ dca_ctx->mem_cnt = 0;
+
+ return 0;
+}
+
+static void uninit_dca_context(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ hns_roce_cleanup_dca_mem(ctx);
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ pthread_spin_destroy(&dca_ctx->lock);
+}
+
+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
+ struct hns_roce_context *context, int cmd_fd)
+{
+ int page_size = hr_dev->page_size;
+
+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, cmd_fd, 0);
+ if (context->uar == MAP_FAILED)
+ return -ENOMEM;
+
+ return 0;
+}
+
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
{
uint32_t count_shift = hr_ilog32(entry_count);
@@ -119,7 +166,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return NULL;
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS;
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -165,11 +212,12 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, cmd_fd, 0);
- if (context->uar == MAP_FAILED)
+ if (init_dca_context(context, hr_dev->page_size))
goto err_free;
+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
+ goto dca_free;
+
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
@@ -177,6 +225,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return &context->ibv_ctx;
+dca_free:
+ uninit_dca_context(context);
err_free:
verbs_uninit_context(&context->ibv_ctx);
free(context);
@@ -189,6 +239,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
struct hns_roce_context *context = to_hr_ctx(ibctx);
munmap(context->uar, hr_dev->page_size);
+ uninit_dca_context(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
}
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 2edb07e..0e25ce5 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -147,6 +147,10 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+enum {
+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
+};
+
#define HNS_ROCE_QP_TABLE_BITS 8
#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
@@ -199,6 +203,18 @@ struct hns_roce_spinlock {
int need_lock;
};
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
+
+struct hns_roce_dca_ctx {
+ struct list_head mem_list;
+ pthread_spinlock_t lock;
+ int mem_cnt;
+ unsigned int unit_size;
+ uint64_t max_size;
+ uint64_t curr_size;
+};
+
struct hns_roce_context {
struct verbs_context ibv_ctx;
void *uar;
@@ -231,6 +247,8 @@ struct hns_roce_context {
unsigned int cqe_size;
uint32_t config;
unsigned int max_inline_data;
+
+ struct hns_roce_dca_ctx dca_ctx;
};
struct hns_roce_td {
@@ -562,6 +580,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
+
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 471dd9c..02c43ae 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
munmap(buf->buf, buf->length);
}
+
+struct hns_roce_dca_mem {
+ uint32_t handle;
+ struct list_node entry;
+ struct hns_roce_buf buf;
+ struct hns_roce_context *ctx;
+};
+
+static void free_dca_mem(struct hns_roce_context *ctx,
+ struct hns_roce_dca_mem *mem)
+{
+ hns_roce_free_buf(&mem->buf);
+ free(mem);
+}
+
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
+{
+ struct hns_roce_dca_mem *mem = NULL;
+ int ret;
+
+ mem = malloc(sizeof(struct hns_roce_dca_mem));
+ if (!mem) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
+ if (ret) {
+ errno = ENOMEM;
+ free(mem);
+ return NULL;
+ }
+
+ return mem;
+}
+
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
+{
+ return (uintptr_t)dca_mem;
+}
+
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
+{
+ return dca_mem->buf.buf + offset;
+}
+
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
+ void *addr, uint32_t size, uint32_t *handle)
+{
+ struct ib_uverbs_attr *attr;
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_REG, 4);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+ ioctl_ptr_to_u64(addr));
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
+
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret) {
+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
+
+ return 0;
+}
+
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_warn(&ctx->ibv_ctx,
+ "failed to dereg DCA mem-%u, ret = %d.\n",
+ handle, ret);
+}
+
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ struct hns_roce_dca_mem *tmp;
+
+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
+ deregister_dca_mem(ctx, mem->handle);
+}
+
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
+ uint32_t alloc_size)
+{
+ bool enable;
+
+ pthread_spin_lock(&ctx->lock);
+
+ if (ctx->unit_size == 0) /* Pool size can't be increased */
+ enable = false;
+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
+ enable = true;
+ else /* Pool size doesn't exceed max size */
+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
+
+ pthread_spin_unlock(&ctx->lock);
+
+ return enable;
+}
+
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ int ret;
+
+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
+ return -ENOMEM;
+
+ /* Step 1: Alloc DCA mem address */
+ mem = alloc_dca_mem(
+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
+ if (!mem)
+ return -ENOMEM;
+
+ /* Step 2: Register DCA mem uobject to pin user address */
+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
+ mem->buf.length, &mem->handle);
+ if (ret) {
+ free_dca_mem(ctx, mem);
+ return ret;
+ }
+
+ /* Step 3: Add DCA mem node to pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
+ dca_ctx->mem_cnt++;
+ dca_ctx->curr_size += mem->buf.length;
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ return 0;
+}
--
2.30.0

View File

@ -0,0 +1,204 @@
From c8d7a2dc811a18ffd314b8764c961234e5f2ec77 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:13 +0800
Subject: libhns: Add support for shrinking DCA memory pool
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The QP's WQE buffer may be detached after QP is modified or CQE is polled,
and the state of DCA mem object may be changed as clean for no QP is using
it. So shrink the clean DCA mem from the memory pool and destroy the DCA
mem's buffer to reduce the memory consumption.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 2 +
providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.c | 7 +++
3 files changed, 112 insertions(+)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 0e25ce5..7b5c5c9 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -212,6 +212,7 @@ struct hns_roce_dca_ctx {
int mem_cnt;
unsigned int unit_size;
uint64_t max_size;
+ uint64_t min_size;
uint64_t curr_size;
};
@@ -580,6 +581,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 02c43ae..c0f86e9 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
return (uintptr_t)dca_mem;
}
+static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx,
+ uint64_t key)
+{
+ struct hns_roce_dca_mem *mem;
+ struct hns_roce_dca_mem *tmp;
+
+ list_for_each_safe(&ctx->mem_list, mem, tmp, entry) {
+ if (dca_mem_to_key(mem) == key)
+ return mem;
+ }
+
+ return NULL;
+}
+
static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
{
return dca_mem->buf.buf + offset;
@@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
deregister_dca_mem(ctx, mem->handle);
}
+struct hns_dca_mem_shrink_resp {
+ uint32_t free_mems;
+ uint64_t free_key;
+};
+
+static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ uint64_t size, struct hns_dca_mem_shrink_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_SHRINK, 4);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle);
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+ &resp->free_key, sizeof(resp->free_key));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+ &resp->free_mems, sizeof(resp->free_mems));
+
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n",
+ ret);
+
+ return ret;
+}
static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
uint32_t alloc_size)
{
@@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
return enable;
}
+static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
+{
+ bool enable;
+
+ pthread_spin_lock(&ctx->lock);
+ enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size;
+ pthread_spin_unlock(&ctx->lock);
+
+ return enable;
+}
+
int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
@@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
return 0;
}
+
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_dca_mem_shrink_resp resp = {};
+ struct hns_roce_dca_mem *mem;
+ int dca_mem_cnt;
+ uint32_t handle;
+ int ret;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ dca_mem_cnt = ctx->dca_ctx.mem_cnt;
+ pthread_spin_unlock(&dca_ctx->lock);
+ while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) {
+ resp.free_mems = 0;
+ /* Step 1: Use any DCA mem uobject to shrink pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = list_tail(&dca_ctx->mem_list,
+ struct hns_roce_dca_mem, entry);
+ handle = mem ? mem->handle : 0;
+ pthread_spin_unlock(&dca_ctx->lock);
+ if (!mem)
+ break;
+
+ ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp);
+ if (ret || likely(resp.free_mems < 1))
+ break;
+
+ /* Step 2: Remove shrunk DCA mem node from pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = key_to_dca_mem(dca_ctx, resp.free_key);
+ if (mem) {
+ list_del(&mem->entry);
+ dca_ctx->mem_cnt--;
+ dca_ctx->curr_size -= mem->buf.length;
+ }
+
+ handle = mem ? mem->handle : 0;
+ pthread_spin_unlock(&dca_ctx->lock);
+ if (!mem)
+ break;
+
+ /* Step 3: Destroy DCA mem uobject */
+ deregister_dca_mem(ctx, handle);
+ free_dca_mem(ctx, mem);
+ /* No any free memory after deregister 1 DCA mem */
+ if (resp.free_mems <= 1)
+ break;
+
+ dca_mem_cnt--;
+ }
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 7b2f2d1..f3a7e6b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -738,6 +738,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
hns_roce_spin_unlock(&cq->hr_lock);
+ /* Try to shrink the DCA mem */
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
return err == V2_CQ_POLL_ERR ? err : npolled;
}
@@ -1674,6 +1678,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
free(qp);
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
return ret;
}
--
2.30.0

View File

@ -0,0 +1,618 @@
From 835bc1a62dfc3398ef9da23de07348a353f67214 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:17 +0800
Subject: libhns: Add support for attaching QP's WQE buffer
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
If a uQP works in DCA mode, the WQE's buffer will be split as many blocks
and be stored into a list. The blocks are allocated from the DCA's memory
pool before posting WRs and are dropped when the QP's CI is equal to PI
after polling CQ.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 26 ++++-
providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++-
providers/hns/hns_roce_u_hw_v2.c | 151 ++++++++++++++++++++++++---
providers/hns/hns_roce_u_hw_v2.h | 2 +
providers/hns/hns_roce_u_verbs.c | 32 ++++--
5 files changed, 358 insertions(+), 26 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 7b5c5c9..44a733f 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -348,11 +348,18 @@ struct hns_roce_sge_ex {
unsigned int sge_shift;
};
+struct hns_roce_dca_buf {
+ void **bufs;
+ unsigned int max_cnt;
+ unsigned int shift;
+};
+
struct hns_roce_qp {
struct verbs_qp verbs_qp;
struct hns_roce_buf buf;
+ struct hns_roce_dca_buf dca_wqe;
int max_inline_data;
- int buf_size;
+ unsigned int buf_size;
unsigned int sq_signal_bits;
struct hns_roce_wq sq;
struct hns_roce_wq rq;
@@ -401,11 +408,22 @@ struct hns_roce_u_hw {
struct verbs_context_ops hw_ops;
};
+struct hns_roce_dca_attach_attr {
+ uint32_t sq_offset;
+ uint32_t sge_offset;
+ uint32_t rq_offset;
+};
+
+struct hns_roce_dca_detach_attr {
+ uint32_t sq_index;
+};
+
/*
* The entries's buffer should be aligned to a multiple of the hardware's
* minimum page size.
*/
#define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
+#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
{
@@ -581,9 +599,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ uint32_t size, struct hns_roce_dca_buf *buf);
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_detach_attr *attr);
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index c0f86e9..3d41b89 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
return ret;
}
+
+struct hns_dca_mem_query_resp {
+ uint64_t key;
+ uint32_t offset;
+ uint32_t page_count;
+};
+
+static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ uint32_t index, struct hns_dca_mem_query_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_QUERY, 5);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+ &resp->key, sizeof(resp->key));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+ &resp->offset, sizeof(resp->offset));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+ &resp->page_count, sizeof(resp->page_count));
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx,
+ "failed to query DCA mem-%u, ret = %d.\n",
+ handle, ret);
+
+ return ret;
+}
+
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_detach_attr *attr)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_DETACH, 4);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+ attr->sq_index);
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_warn(&ctx->ibv_ctx,
+ "failed to detach DCA mem-%u, ret = %d.\n",
+ handle, ret);
+}
+
+struct hns_dca_mem_attach_resp {
+#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
+ uint32_t alloc_flags;
+ uint32_t alloc_pages;
+};
+
+static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ struct hns_dca_mem_attach_resp *resp)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+ attr->sq_offset);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+ attr->sge_offset);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+ attr->rq_offset);
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+ &resp->alloc_flags, sizeof(resp->alloc_flags));
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+ &resp->alloc_pages, sizeof(resp->alloc_pages));
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_err(&ctx->ibv_ctx,
+ "failed to attach DCA mem-%u, ret = %d.\n",
+ handle, ret);
+
+ return ret;
+}
+
static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
uint32_t alloc_size)
{
@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
return enable;
}
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
struct hns_roce_dca_mem *mem;
@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
dca_mem_cnt--;
}
}
+
+static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf,
+ uint32_t page_index, int page_count)
+{
+ void **pages = &buf->bufs[page_index];
+ int page_size = 1 << buf->shift;
+ int i;
+
+ for (i = 0; i < page_count; i++) {
+ pages[i] = addr;
+ addr += page_size;
+ }
+}
+
+static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_buf *buf, uint32_t page_count)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_dca_mem_query_resp resp = {};
+ struct hns_roce_dca_mem *mem;
+ uint32_t idx = 0;
+ int ret;
+
+ while (idx < page_count && idx < buf->max_cnt) {
+ resp.page_count = 0;
+ ret = query_dca_mem(ctx, handle, idx, &resp);
+ if (ret)
+ return -ENOMEM;
+ if (resp.page_count < 1)
+ break;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ mem = key_to_dca_mem(dca_ctx, resp.key);
+ if (mem && resp.offset < mem->buf.length) {
+ config_dca_pages(dca_mem_addr(mem, resp.offset),
+ buf, idx, resp.page_count);
+ } else {
+ pthread_spin_unlock(&dca_ctx->lock);
+ break;
+ }
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ idx += resp.page_count;
+ }
+
+ return (idx >= page_count) ? 0 : -ENOMEM;
+}
+
+#define DCA_EXPAND_MEM_TRY_TIMES 3
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+ struct hns_roce_dca_attach_attr *attr,
+ uint32_t size, struct hns_roce_dca_buf *buf)
+{
+ uint32_t buf_pages = size >> buf->shift;
+ struct hns_dca_mem_attach_resp resp = {};
+ bool is_new_buf = true;
+ int try_times = 0;
+ int ret = 0;
+
+ do {
+ resp.alloc_pages = 0;
+ ret = attach_dca_mem(ctx, handle, attr, &resp);
+ if (ret)
+ break;
+
+ if (resp.alloc_pages >= buf_pages) {
+ is_new_buf = !!(resp.alloc_flags &
+ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
+ break;
+ }
+
+ ret = add_dca_mem(ctx, size);
+ if (ret)
+ break;
+ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
+
+ if (ret || resp.alloc_pages < buf_pages) {
+ verbs_err(&ctx->ibv_ctx,
+ "failed to attach, size %u count %u != %u, ret = %d.\n",
+ size, buf_pages, resp.alloc_pages, ret);
+ return -ENOMEM;
+ }
+
+ /* No need config user address if DCA config not changed */
+ if (!is_new_buf && buf->bufs[0])
+ return 0;
+
+ return setup_dca_buf(ctx, handle, buf, buf_pages);
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index f3a7e6b..7e3ad92 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -197,19 +197,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
return get_sw_cqe_v2(cq, cq->cons_index);
}
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
+{
+ return !!qp->dca_wqe.bufs;
+}
+
+static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
+{
+ if (likely(qp->buf.buf))
+ return qp->buf.buf + offset;
+ else if (unlikely(check_qp_dca_enable(qp)))
+ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] +
+ (offset & ((1 << qp->dca_wqe.shift) - 1));
+ else
+ return NULL;
+}
+
static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
}
static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
}
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
{
- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
+ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
}
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
@@ -569,6 +585,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
wc->opcode = wc_send_op_map[opcode];
}
+static bool check_dca_attach_enable(struct hns_roce_qp *qp)
+{
+ return check_qp_dca_enable(qp) &&
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH);
+}
+
+static bool check_dca_detach_enable(struct hns_roce_qp *qp)
+{
+ return check_qp_dca_enable(qp) &&
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH);
+}
+
+static int dca_attach_qp_buf(struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp)
+{
+ struct hns_roce_dca_attach_attr attr = {};
+ uint32_t idx;
+ int ret;
+
+ hns_roce_spin_lock(&qp->sq.hr_lock);
+ hns_roce_spin_lock(&qp->rq.hr_lock);
+
+ if (qp->sq.wqe_cnt > 0) {
+ idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
+ attr.sq_offset = idx << qp->sq.wqe_shift;
+ }
+
+ if (qp->ex_sge.sge_cnt > 0) {
+ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
+ attr.sge_offset = idx << qp->ex_sge.sge_shift;
+ }
+
+ if (qp->rq.wqe_cnt > 0) {
+ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
+ attr.rq_offset = idx << qp->rq.wqe_shift;
+ }
+
+
+ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
+ qp->buf_size, &qp->dca_wqe);
+
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
+
+ return ret;
+}
+
+static void dca_detach_qp_buf(struct hns_roce_context *ctx,
+ struct hns_roce_qp *qp)
+{
+ struct hns_roce_dca_detach_attr attr;
+ bool is_empty;
+
+ hns_roce_spin_lock(&qp->sq.hr_lock);
+ hns_roce_spin_lock(&qp->rq.hr_lock);
+
+ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
+ if (is_empty && qp->sq.wqe_cnt > 0)
+ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
+
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
+
+ if (is_empty)
+ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
+}
+
static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
struct hns_roce_cq *cq)
{
@@ -725,6 +808,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
for (npolled = 0; npolled < ne; ++npolled) {
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
+
if (err != V2_CQ_OK)
break;
}
@@ -768,19 +854,30 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
return 0;
}
-static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
+static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
- if (unlikely(qp->qp_type != IBV_QPT_RC &&
- qp->qp_type != IBV_QPT_UD) &&
- qp->qp_type != IBV_QPT_XRC_SEND)
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+ int ret = 0;
+
+ if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
+ ibvqp->qp_type != IBV_QPT_UD) &&
+ ibvqp->qp_type != IBV_QPT_XRC_SEND)
return -EINVAL;
- if (unlikely(qp->state == IBV_QPS_RESET ||
- qp->state == IBV_QPS_INIT ||
- qp->state == IBV_QPS_RTR))
+ if (unlikely(ibvqp->state == IBV_QPS_RESET ||
+ ibvqp->state == IBV_QPS_INIT ||
+ ibvqp->state == IBV_QPS_RTR))
return -EINVAL;
- return 0;
+ if (check_dca_attach_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret)
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u send, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ }
+
+ return ret;
}
static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
@@ -1148,6 +1245,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
return 0;
}
+static inline void fill_rc_dca_fields(uint32_t qp_num,
+ struct hns_roce_rc_sq_wqe *wqe)
+{
+ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num);
+ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH);
+}
+
static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
const struct ibv_send_wr *wr)
{
@@ -1259,6 +1363,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
return ret;
wqe_valid:
+ if (check_qp_dca_enable(qp))
+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
+
enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
return 0;
@@ -1275,7 +1382,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
struct ibv_qp_attr attr;
int ret;
- ret = check_qp_send(ibvqp, ctx);
+ ret = check_qp_send(qp, ctx);
if (unlikely(ret)) {
*bad_wr = wr;
return ret;
@@ -1352,15 +1459,20 @@ out:
return ret;
}
-static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx)
+static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
- if (unlikely(qp->qp_type != IBV_QPT_RC &&
- qp->qp_type != IBV_QPT_UD))
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+
+ if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
+ ibvqp->qp_type != IBV_QPT_UD))
return -EINVAL;
- if (qp->state == IBV_QPS_RESET || qp->srq)
+ if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq)
return -EINVAL;
+ if (check_dca_attach_enable(qp))
+ return dca_attach_qp_buf(ctx, qp);
+
return 0;
}
@@ -1428,7 +1540,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
struct ibv_qp_attr attr;
int ret;
- ret = check_qp_recv(ibvqp, ctx);
+ ret = check_qp_recv(qp, ctx);
if (unlikely(ret)) {
*bad_wr = wr;
return ret;
@@ -1551,6 +1663,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
int attr_mask)
{
+ struct hns_roce_context *ctx = to_hr_ctx(qp->context);
struct hns_roce_modify_qp_ex_resp resp_ex = {};
struct hns_roce_modify_qp_ex cmd_ex = {};
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
@@ -1598,6 +1711,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (hr_qp->tc_mode == HNS_ROCE_TC_MAP_MODE_DSCP)
hr_qp->sl = hr_qp->priority;
+ /* Try to shrink the DCA mem */
+ if (ctx->dca_ctx.mem_cnt > 0)
+ hns_roce_shrink_dca_mem(ctx);
+
record_qp_attr(qp, attr, attr_mask);
return ret;
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index d71c695..a22995d 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -239,6 +239,8 @@ struct hns_roce_rc_sq_wqe {
#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
+#define RCWQE_SQPN_L_WIDTH 2
+
struct hns_roce_v2_wqe_data_seg {
__le32 len;
__le32 lkey;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index f6c7423..749b01b 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -1165,6 +1165,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
return 0;
}
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+{
+ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
+ return true;
+
+ return false;
+}
+
static void qp_free_wqe(struct hns_roce_qp *qp)
{
free_recv_rinl_buf(&qp->rq_rinl_buf);
@@ -1176,8 +1184,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
hns_roce_free_buf(&qp->buf);
}
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
- struct hns_roce_context *ctx)
+static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
+ struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1195,12 +1203,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
}
if (qp->rq_rinl_buf.wqe_cnt) {
- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
+ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge,
+ &qp->rq_rinl_buf))
goto err_alloc;
}
- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE))
- goto err_alloc;
+ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+ /* when DCA is enabled, use a buffer list to store page addr */
+ qp->buf.buf = NULL;
+ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
+ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
+ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
+ if (!qp->dca_wqe.bufs)
+ goto err_alloc;
+ } else {
+ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
+ HNS_HW_PAGE_SIZE))
+ goto err_alloc;
+ }
return 0;
@@ -1467,7 +1487,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
return -ENOMEM;
- ret = qp_alloc_wqe(&attr->cap, qp, ctx);
+ ret = qp_alloc_wqe(attr, qp, ctx);
if (ret)
return ret;
--
2.30.0

View File

@ -0,0 +1,167 @@
From a5e62921afc2fcc152e8b0584f2d04d1a4db4f10 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 29 Jun 2021 20:06:47 +0800
Subject: libhns: Use shared memory to sync DCA status
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
The user DCA needs to check the QP attaching state before filling wqe
buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but
this will result in too much time being wasted on system calls, so use a
shared table between user driver and kernel driver to sync DCA status.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++---
providers/hns/hns_roce_u.h | 10 ++++++++
2 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index bd2b251..fe30cda 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -95,9 +95,33 @@ static const struct verbs_context_ops hns_common_ops = {
.alloc_parent_domain = hns_roce_u_alloc_pad,
};
-static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
+ int page_size, size_t size, uint64_t mmap_key)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+ mmap_key);
+ if (addr == MAP_FAILED) {
+ verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n");
+ return -EINVAL;
+ }
+
+ dca_ctx->buf_status = addr;
+ dca_ctx->sync_status = addr + size / 2;
+
+ return 0;
+}
+
+static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
+ struct hns_roce_alloc_ucontext_resp *resp,
+ int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ uint64_t mmap_key = resp->dca_mmap_key;
+ int mmap_size = resp->dca_mmap_size;
+ int max_qps = resp->dca_qps;
int ret;
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
@@ -112,6 +136,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size)
dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
dca_ctx->mem_cnt = 0;
+ if (mmap_key) {
+ const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
+
+ if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) {
+ dca_ctx->status_size = mmap_size;
+ dca_ctx->max_qps = min_t(int, max_qps,
+ mmap_size * 8 / bits_per_qp);
+ }
+ }
+
return 0;
}
@@ -125,6 +159,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx)
pthread_spin_lock(&dca_ctx->lock);
hns_roce_cleanup_dca_mem(ctx);
pthread_spin_unlock(&dca_ctx->lock);
+ if (dca_ctx->buf_status)
+ munmap(dca_ctx->buf_status, dca_ctx->status_size);
pthread_spin_destroy(&dca_ctx->lock);
}
@@ -149,6 +185,14 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size)
+{
+ cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
+ cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS;
+}
+
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
@@ -165,8 +209,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
+ ucontext_set_cmd(&cmd, hr_dev->page_size);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -212,7 +255,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- if (init_dca_context(context, hr_dev->page_size))
+ if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size))
goto err_free;
if (hns_roce_mmap(hr_dev, context, cmd_fd))
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 44a733f..a8f811e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -35,6 +35,7 @@
#include <stddef.h>
#include <endian.h>
+#include <stdatomic.h>
#include <util/compiler.h>
#include <infiniband/driver.h>
@@ -44,6 +45,7 @@
#include <ccan/array_size.h>
#include <util/bitmap.h>
#include <ccan/container_of.h>
+#include <ccan/minmax.h>
#include <linux/if_ether.h>
#include "hns_roce_u_abi.h"
@@ -52,6 +54,8 @@
#define PFX "hns: "
+typedef _Atomic(uint64_t) atomic_bitmap_t;
+
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
@@ -214,6 +218,12 @@ struct hns_roce_dca_ctx {
uint64_t max_size;
uint64_t min_size;
uint64_t curr_size;
+
+#define HNS_DCA_BITS_PER_STATUS 1
+ unsigned int max_qps;
+ unsigned int status_size;
+ atomic_bitmap_t *buf_status;
+ atomic_bitmap_t *sync_status;
};
struct hns_roce_context {
--
2.30.0

View File

@ -0,0 +1,222 @@
From 13d4b60fcd0880fae54b1af627eeb7297d7b086d Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Tue, 29 Jun 2021 21:01:27 +0800
Subject: libhns: Sync DCA status by shared memory
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
Use DCA num from the resp of modify_qp() and indicate the DCA status bit in
the shared memory, if the num is valid, the user DCA can get the DCA status
by testing the bit in the shared memory for each QP, othewise invoke the
verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status.
Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status
changing by kernel driver or user driver, another bit is used to indicate
the DCA attaching status.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++
providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++
providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++-
3 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index a8f811e..91b0c8f 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -362,6 +362,7 @@ struct hns_roce_dca_buf {
void **bufs;
unsigned int max_cnt;
unsigned int shift;
+ unsigned int dcan;
};
struct hns_roce_qp {
@@ -422,6 +423,7 @@ struct hns_roce_dca_attach_attr {
uint32_t sq_offset;
uint32_t sge_offset;
uint32_t rq_offset;
+ bool force;
};
struct hns_roce_dca_detach_attr {
@@ -534,6 +536,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
return 0;
}
+#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64))
+#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64)
+
+static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr)
+{
+ p += HNS_ROCE_BIT_WORD(nr);
+ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr));
+}
+
+static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr)
+{
+ uint64_t mask = HNS_ROCE_BIT_MASK(nr);
+
+ p += HNS_ROCE_BIT_WORD(nr);
+ if (atomic_load(p) & mask)
+ return true;
+
+ return (atomic_fetch_or(p, mask) & mask) != 0;
+}
+
+static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
+{
+ p += HNS_ROCE_BIT_WORD(nr);
+ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
+}
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size);
@@ -614,6 +642,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
uint32_t size, struct hns_roce_dca_buf *buf);
void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
struct hns_roce_dca_detach_attr *attr);
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
+
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 3d41b89..08c0fbc 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
return (idx >= page_count) ? 0 : -ENOMEM;
}
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
+#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
+
+#define MAX_DCA_TRY_LOCK_TIMES 10
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->sync_status;
+ int try_times = 0;
+
+ if (!st || dcan >= ctx->max_qps)
+ return true;
+
+ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan)))
+ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES)
+ return false;
+
+ return true;
+}
+
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->sync_status;
+
+ if (!st || dcan >= ctx->max_qps)
+ return;
+
+ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan));
+}
+
+static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
+{
+ atomic_bitmap_t *st = ctx->buf_status;
+
+ if (!st || dcan >= ctx->max_qps)
+ return false;
+
+ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan));
+}
+
#define DCA_EXPAND_MEM_TRY_TIMES 3
int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
struct hns_roce_dca_attach_attr *attr,
@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
int try_times = 0;
int ret = 0;
+ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan))
+ return 0;
+
do {
resp.alloc_pages = 0;
ret = attach_dca_mem(ctx, handle, attr, &resp);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 7e3ad92..028d20c 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -601,6 +601,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
struct hns_roce_qp *qp)
{
struct hns_roce_dca_attach_attr attr = {};
+ bool enable_detach;
uint32_t idx;
int ret;
@@ -622,9 +623,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
attr.rq_offset = idx << qp->rq.wqe_shift;
}
+ enable_detach = check_dca_detach_enable(qp);
+ if (enable_detach &&
+ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan))
+ /* Force attach if failed to sync dca status */
+ attr.force = true;
ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
- qp->buf_size, &qp->dca_wqe);
+ qp->buf_size, &qp->dca_wqe);
+ if (ret && enable_detach)
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
hns_roce_spin_unlock(&qp->rq.hr_lock);
hns_roce_spin_unlock(&qp->sq.hr_lock);
@@ -1450,6 +1458,9 @@ out:
hns_roce_spin_unlock(&qp->sq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
@@ -1582,6 +1593,9 @@ out:
hns_roce_spin_unlock(&qp->rq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibvqp->state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE);
@@ -1693,6 +1707,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
if (attr->qp_state == IBV_QPS_RTR) {
hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
hr_qp->priority = resp_ex.drv_payload.priority;
+ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan;
}
}
@@ -2721,6 +2736,9 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
out:
hns_roce_spin_unlock(&qp->sq.hr_lock);
+ if (check_dca_detach_enable(qp))
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
+
if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
attr.qp_state = IBV_QPS_ERR;
hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
--
2.30.0

View File

@ -0,0 +1,766 @@
From 6aa5efb3059c66d3d0f49804551b38c5ed827ec1 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:49 +0800
Subject: libhns: Add direct verbs support to config DCA
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
----------------------------------------------------------
Add two direct verbs to config DCA:
1. hnsdv_open_device() is used to config DCA memory pool.
2. hnsdv_create_qp() is used to create a DCA QP.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
debian/control | 2 +-
debian/ibverbs-providers.install | 1 +
debian/ibverbs-providers.lintian-overrides | 4 +-
debian/ibverbs-providers.symbols | 6 ++
debian/libibverbs-dev.install | 4 +
providers/hns/CMakeLists.txt | 9 ++-
providers/hns/hns_roce_u.c | 92 +++++++++++++++++++---
providers/hns/hns_roce_u.h | 2 +
providers/hns/hns_roce_u_abi.h | 1 +
providers/hns/hns_roce_u_buf.c | 3 +
providers/hns/hns_roce_u_hw_v2.c | 33 +++++++-
providers/hns/hns_roce_u_verbs.c | 58 ++++++++++++--
providers/hns/hnsdv.h | 65 +++++++++++++++
providers/hns/libhns.map | 9 +++
redhat/rdma-core.spec | 5 +-
suse/rdma-core.spec | 21 ++++-
16 files changed, 289 insertions(+), 26 deletions(-)
create mode 100644 providers/hns/hnsdv.h
create mode 100644 providers/hns/libhns.map
diff --git a/debian/control b/debian/control
index 7485ad3..22eb6cd 100644
--- a/debian/control
+++ b/debian/control
@@ -94,7 +94,7 @@ Description: User space provider drivers for libibverbs
- cxgb4: Chelsio T4 iWARP HCAs
- efa: Amazon Elastic Fabric Adapter
- hfi1verbs: Intel Omni-Path HFI
- - hns: HiSilicon Hip06 SoC
+ - hns: HiSilicon Hip08+ SoC
- ipathverbs: QLogic InfiniPath HCAs
- irdma: Intel Ethernet Connection RDMA
- mlx4: Mellanox ConnectX-3 InfiniBand HCAs
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
index 4f971fb..c6ecbbc 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
@@ -1,5 +1,6 @@
etc/libibverbs.d/
usr/lib/*/libefa.so.*
usr/lib/*/libibverbs/lib*-rdmav*.so
+usr/lib/*/libhns.so.*
usr/lib/*/libmlx4.so.*
usr/lib/*/libmlx5.so.*
diff --git a/debian/ibverbs-providers.lintian-overrides b/debian/ibverbs-providers.lintian-overrides
index 8a44d54..f6afb70 100644
--- a/debian/ibverbs-providers.lintian-overrides
+++ b/debian/ibverbs-providers.lintian-overrides
@@ -1,2 +1,2 @@
-# libefa, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
-ibverbs-providers: package-name-doesnt-match-sonames libefa1 libmlx4-1 libmlx5-1
+# libefa, libhns, libmlx4 and libmlx5 are ibverbs provider that provides more functions.
+ibverbs-providers: package-name-doesnt-match-sonames libefa1 libhns-1 libmlx4-1 libmlx5-1
diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
index 2c6b330..1844369 100644
--- a/debian/ibverbs-providers.symbols
+++ b/debian/ibverbs-providers.symbols
@@ -162,3 +162,9 @@ libefa.so.1 ibverbs-providers #MINVER#
efadv_create_qp_ex@EFA_1.1 26
efadv_query_device@EFA_1.1 26
efadv_query_ah@EFA_1.1 26
+libhns.so.1 ibverbs-providers #MINVER#
+* Build-Depends-Package: libibverbs-dev
+ HNS_1.0@HNS_1.0 36
+ hnsdv_is_supported@HNS_1.0 36
+ hnsdv_open_device@HNS_1.0 36
+ hnsdv_create_qp@HNS_1.0 36
diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
index bc8caa5..7d6e6a2 100644
--- a/debian/libibverbs-dev.install
+++ b/debian/libibverbs-dev.install
@@ -1,5 +1,6 @@
usr/include/infiniband/arch.h
usr/include/infiniband/efadv.h
+usr/include/infiniband/hnsdv.h
usr/include/infiniband/ib_user_ioctl_verbs.h
usr/include/infiniband/mlx4dv.h
usr/include/infiniband/mlx5_api.h
@@ -14,6 +15,8 @@ usr/include/infiniband/verbs_api.h
usr/lib/*/lib*-rdmav*.a
usr/lib/*/libefa.a
usr/lib/*/libefa.so
+usr/lib/*/libhns.a
+usr/lib/*/libhns.so
usr/lib/*/libibverbs*.so
usr/lib/*/libibverbs.a
usr/lib/*/libmlx4.a
@@ -21,6 +24,7 @@ usr/lib/*/libmlx4.so
usr/lib/*/libmlx5.a
usr/lib/*/libmlx5.so
usr/lib/*/pkgconfig/libefa.pc
+usr/lib/*/pkgconfig/libhns.pc
usr/lib/*/pkgconfig/libibverbs.pc
usr/lib/*/pkgconfig/libmlx4.pc
usr/lib/*/pkgconfig/libmlx5.pc
diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
index 7aaca75..160e1ff 100644
--- a/providers/hns/CMakeLists.txt
+++ b/providers/hns/CMakeLists.txt
@@ -1,7 +1,14 @@
-rdma_provider(hns
+rdma_shared_provider(hns libhns.map
+ 1 1.0.${PACKAGE_VERSION}
hns_roce_u.c
hns_roce_u_buf.c
hns_roce_u_db.c
hns_roce_u_hw_v2.c
hns_roce_u_verbs.c
)
+
+publish_headers(infiniband
+ hnsdv.h
+)
+
+rdma_pkg_config("hns" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index fe30cda..0cf6d4b 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -114,8 +114,60 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd,
return 0;
}
+bool hnsdv_is_supported(struct ibv_device *device)
+{
+ return is_hns_dev(device);
+}
+
+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
+ struct hnsdv_context_attr *attr)
+{
+ if (!is_hns_dev(device)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ return verbs_open_device(device, attr);
+}
+
+static void set_dca_pool_param(struct hns_roce_context *ctx,
+ struct hnsdv_context_attr *attr, int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE)
+ dca_ctx->unit_size = align(attr->dca_unit_size, page_size);
+ else
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+
+ /* The memory pool cannot be expanded, only init the DCA context. */
+ if (dca_ctx->unit_size == 0)
+ return;
+
+ /* If not set, the memory pool can be expanded unlimitedly. */
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE)
+ dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size,
+ dca_ctx->unit_size) *
+ dca_ctx->unit_size;
+ else
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+
+ /* If not set, the memory pool cannot be shrunk. */
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE)
+ dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size,
+ dca_ctx->unit_size) *
+ dca_ctx->unit_size;
+ else
+ dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE;
+
+ verbs_debug(&ctx->ibv_ctx,
+ "Support DCA, unit %d, max %ld, min %ld Bytes.\n",
+ dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size);
+}
+
static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
struct hns_roce_alloc_ucontext_resp *resp,
+ struct hnsdv_context_attr *attr,
int page_size)
{
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
@@ -127,14 +179,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd,
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
return 0;
+ dca_ctx->unit_size = 0;
+ dca_ctx->mem_cnt = 0;
+
list_head_init(&dca_ctx->mem_list);
ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
if (ret)
return ret;
- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
- dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
- dca_ctx->mem_cnt = 0;
+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
+ return 0;
+
+ set_dca_pool_param(ctx, attr, page_size);
if (mmap_key) {
const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
@@ -185,18 +241,28 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size)
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd,
+ struct hnsdv_context_attr *attr)
{
cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
- cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
- cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS;
+ HNS_ROCE_CQE_INLINE_FLAGS;
+
+ if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA))
+ return;
+
+ cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA;
+
+ if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) {
+ cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS;
+ cmd->dca_max_qps = attr->dca_prime_qps;
+ }
}
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
int cmd_fd,
void *private_data)
{
+ struct hnsdv_context_attr *ctx_attr = private_data;
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
struct hns_roce_alloc_ucontext_resp resp = {};
struct hns_roce_alloc_ucontext cmd = {};
@@ -209,7 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- ucontext_set_cmd(&cmd, hr_dev->page_size);
+ ucontext_set_cmd(&cmd, ctx_attr);
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -255,7 +321,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
context->max_srq_wr = dev_attrs.max_srq_wr;
context->max_srq_sge = dev_attrs.max_srq_sge;
- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size))
+ if (init_dca_context(context, cmd_fd,
+ &resp, ctx_attr, hr_dev->page_size))
goto err_free;
if (hns_roce_mmap(hr_dev, context, cmd_fd))
@@ -317,4 +384,11 @@ static const struct verbs_device_ops hns_roce_dev_ops = {
.uninit_device = hns_uninit_device,
.alloc_context = hns_roce_alloc_context,
};
+
+bool is_hns_dev(struct ibv_device *device)
+{
+ struct verbs_device *verbs_device = verbs_get_device(device);
+
+ return verbs_device->ops == &hns_roce_dev_ops;
+}
PROVIDER_DRIVER(hns, hns_roce_dev_ops);
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 91b0c8f..71c35c5 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -562,6 +562,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
}
+bool is_hns_dev(struct ibv_device *device);
+
int hns_roce_u_query_device(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr, size_t attr_size);
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 0519ac7..1eaf62d 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -36,6 +36,7 @@
#include <infiniband/kern-abi.h>
#include <rdma/hns-abi.h>
#include <kernel-abi/hns-abi.h>
+#include "hnsdv.h"
DECLARE_DRV_CMD(hns_roce_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
empty, hns_roce_ib_alloc_pd_resp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 08c0fbc..780683e 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
void hns_roce_free_buf(struct hns_roce_buf *buf)
{
+ if (!buf->buf)
+ return;
+
ibv_dofork_range(buf->buf, buf->length);
munmap(buf->buf, buf->length);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 028d20c..7661863 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -1473,6 +1473,7 @@ out:
static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+ int ret = 0;
if (unlikely(ibvqp->qp_type != IBV_QPT_RC &&
ibvqp->qp_type != IBV_QPT_UD))
@@ -1481,10 +1482,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (ibvqp->state == IBV_QPS_RESET || ibvqp->srq)
return -EINVAL;
- if (check_dca_attach_enable(qp))
- return dca_attach_qp_buf(ctx, qp);
+ if (check_dca_attach_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret)
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u recv, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ }
- return 0;
+ return ret;
}
static void fill_recv_sge_to_wqe(struct ibv_recv_wr *wr, void *wqe,
@@ -1951,6 +1957,9 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
hns_roce_spin_lock(&cq->hr_lock);
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
+
if (err != V2_CQ_OK)
hns_roce_spin_unlock(&cq->hr_lock);
@@ -1965,6 +1974,8 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current)
int err;
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
+ if (qp && check_dca_detach_enable(qp))
+ dca_detach_qp_buf(ctx, qp);
if (err != V2_CQ_OK)
return err;
@@ -2159,6 +2170,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
hr_reg_clear(wqe, RCWQE_INLINE);
hr_reg_clear(wqe, RCWQE_SO);
+ if (check_qp_dca_enable(qp))
+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, wqe);
+
qp->sq.wrid[wqe_idx] = wr_id;
qp->cur_wqe = wqe;
qp->sq.head++;
@@ -2691,8 +2705,10 @@ static void wr_set_inline_data_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_buf,
static void wr_start(struct ibv_qp_ex *ibv_qp)
{
+ struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context);
struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base);
enum ibv_qp_state state = ibv_qp->qp_base.state;
+ int ret;
if (state == IBV_QPS_RESET ||
state == IBV_QPS_INIT ||
@@ -2701,6 +2717,17 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
return;
}
+ if (check_qp_dca_enable(qp)) {
+ ret = dca_attach_qp_buf(ctx, qp);
+ if (ret) {
+ verbs_err_datapath(&ctx->ibv_ctx,
+ "failed to attach QP-%u send, ret = %d.\n",
+ qp->verbs_qp.qp.qp_num, ret);
+ qp->err = ret;
+ return;
+ }
+ }
+
hns_roce_spin_lock(&qp->sq.hr_lock);
qp->sge_info.start_idx = qp->next_sge;
qp->rb_sq_head = qp->sq.head;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 749b01b..282ab74 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -961,6 +961,15 @@ enum {
IBV_QP_INIT_ATTR_SEND_OPS_FLAGS,
};
+enum {
+ SEND_OPS_FLAG_MASK =
+ IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM |
+ IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM |
+ IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP |
+ IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV |
+ IBV_QP_EX_WITH_SEND_WITH_INV,
+};
+
static int check_qp_create_mask(struct hns_roce_context *ctx,
struct ibv_qp_init_attr_ex *attr)
{
@@ -969,6 +978,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx,
if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK))
return -EOPNOTSUPP;
+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS &&
+ !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK))
+ return -EOPNOTSUPP;
+
switch (attr->qp_type) {
case IBV_QPT_UD:
if (hr_dev->hw_version == HNS_ROCE_HW_VER2)
@@ -1165,9 +1178,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
return 0;
}
-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr)
{
- if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
+ /* DCA pool disable */
+ if (!dca_ctx->unit_size)
+ return false;
+
+ /* Unsupport type */
+ if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND)
+ return false;
+
+ if (hns_attr &&
+ (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) &&
+ (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE))
return true;
return false;
@@ -1185,6 +1210,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
}
static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
struct hns_roce_qp *qp, struct hns_roce_context *ctx)
{
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1208,7 +1234,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
goto err_alloc;
}
- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+ if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) &&
+ ctx->dca_ctx.max_size > 0) {
/* when DCA is enabled, use a buffer list to store page addr */
qp->buf.buf = NULL;
qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
@@ -1216,6 +1243,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
if (!qp->dca_wqe.bufs)
goto err_alloc;
+ verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n");
} else {
if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
HNS_HW_PAGE_SIZE))
@@ -1478,6 +1506,7 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
}
static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr,
struct hns_roce_qp *qp,
struct hns_roce_context *ctx)
{
@@ -1487,7 +1516,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
pthread_spin_init(&qp->rq.hr_lock.lock, PTHREAD_PROCESS_PRIVATE))
return -ENOMEM;
- ret = qp_alloc_wqe(attr, qp, ctx);
+ ret = qp_alloc_wqe(attr, hns_attr, qp, ctx);
if (ret)
return ret;
@@ -1510,7 +1539,8 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
}
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
- struct ibv_qp_init_attr_ex *attr)
+ struct ibv_qp_init_attr_ex *attr,
+ struct hnsdv_qp_init_attr *hns_attr)
{
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
struct hns_roce_qp *qp;
@@ -1533,7 +1563,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
if (ret)
goto err_spinlock;
- ret = hns_roce_alloc_qp_buf(attr, qp, context);
+ ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context);
if (ret)
goto err_buf;
@@ -1587,7 +1617,7 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
attrx.pd = pd;
- qp = create_qp(pd->context, &attrx);
+ qp = create_qp(pd->context, &attrx, NULL);
if (qp)
memcpy(attr, &attrx, sizeof(*attr));
@@ -1597,7 +1627,19 @@ struct ibv_qp *hns_roce_u_create_qp(struct ibv_pd *pd,
struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context,
struct ibv_qp_init_attr_ex *attr)
{
- return create_qp(context, attr);
+ return create_qp(context, attr, NULL);
+}
+
+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct hnsdv_qp_init_attr *hns_attr)
+{
+ if (!is_hns_dev(context->device)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ return create_qp(context, qp_attr, hns_attr);
}
struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h
new file mode 100644
index 0000000..cfe1611
--- /dev/null
+++ b/providers/hns/hnsdv.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright (c) 2021 HiSilicon Limited.
+ */
+
+#ifndef __HNSDV_H__
+#define __HNSDV_H__
+
+#include <stdio.h>
+#include <stdbool.h>
+
+#include <sys/types.h>
+
+#include <infiniband/verbs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum hnsdv_context_attr_flags {
+ HNSDV_CONTEXT_FLAGS_DCA = 1 << 0,
+};
+
+enum hnsdv_context_comp_mask {
+ HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0,
+ HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1,
+ HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2,
+ HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3,
+};
+
+struct hnsdv_context_attr {
+ uint64_t flags; /* Use enum hnsdv_context_attr_flags */
+ uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */
+ uint32_t dca_prime_qps;
+ uint32_t dca_unit_size;
+ uint64_t dca_max_size;
+ uint64_t dca_min_size;
+};
+
+bool hnsdv_is_supported(struct ibv_device *device);
+struct ibv_context *hnsdv_open_device(struct ibv_device *device,
+ struct hnsdv_context_attr *attr);
+
+enum hnsdv_qp_create_flags {
+ HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0,
+};
+
+enum hnsdv_qp_init_attr_mask {
+ HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
+};
+
+struct hnsdv_qp_init_attr {
+ uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */
+ uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */
+};
+
+struct ibv_qp *hnsdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct hnsdv_qp_init_attr *hns_qp_attr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HNSDV_H__ */
diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map
new file mode 100644
index 0000000..aed491c
--- /dev/null
+++ b/providers/hns/libhns.map
@@ -0,0 +1,9 @@
+/* Export symbols should be added below according to
+ Documentation/versioning.md document. */
+HNS_1.0 {
+ global:
+ hnsdv_is_supported;
+ hnsdv_open_device;
+ hnsdv_create_qp;
+ local: *;
+};
diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index f1b196a..321578c 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -150,6 +150,8 @@ Provides: libefa = %{version}-%{release}
Obsoletes: libefa < %{version}-%{release}
Provides: libhfi1 = %{version}-%{release}
Obsoletes: libhfi1 < %{version}-%{release}
+Provides: libhns = %{version}-%{release}
+Obsoletes: libhns < %{version}-%{release}
Provides: libipathverbs = %{version}-%{release}
Obsoletes: libipathverbs < %{version}-%{release}
Provides: libirdma = %{version}-%{release}
@@ -177,7 +179,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- libcxgb4: Chelsio T4 iWARP HCA
- libefa: Amazon Elastic Fabric Adapter
- libhfi1: Intel Omni-Path HFI
-- libhns: HiSilicon Hip06 SoC
+- libhns: HiSilicon Hip08+ SoC
- libipathverbs: QLogic InfiniPath HCA
- libirdma: Intel Ethernet Connection RDMA
- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
@@ -562,6 +564,7 @@ fi
%dir %{_sysconfdir}/libibverbs.d
%dir %{_libdir}/libibverbs
%{_libdir}/libefa.so.*
+%{_libdir}/libhns.so.*
%{_libdir}/libibverbs*.so.*
%{_libdir}/libibverbs/*.so
%{_libdir}/libmlx5.so.*
diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec
index bd1faec..ce19db1 100644
--- a/suse/rdma-core.spec
+++ b/suse/rdma-core.spec
@@ -35,6 +35,7 @@ License: BSD-2-Clause OR GPL-2.0-only
Group: Productivity/Networking/Other
%define efa_so_major 1
+%define hns_so_major 1
%define verbs_so_major 1
%define rdmacm_so_major 1
%define umad_so_major 3
@@ -44,6 +45,7 @@ Group: Productivity/Networking/Other
%define mad_major 5
%define efa_lname libefa%{efa_so_major}
+%define hns_lname libhns%{hns_so_major}
%define verbs_lname libibverbs%{verbs_so_major}
%define rdmacm_lname librdmacm%{rdmacm_so_major}
%define umad_lname libibumad%{umad_so_major}
@@ -157,6 +159,7 @@ Requires: %{umad_lname} = %{version}-%{release}
Requires: %{verbs_lname} = %{version}-%{release}
%if 0%{?dma_coherent}
Requires: %{efa_lname} = %{version}-%{release}
+Requires: %{hns_lname} = %{version}-%{release}
Requires: %{mlx4_lname} = %{version}-%{release}
Requires: %{mlx5_lname} = %{version}-%{release}
%endif
@@ -197,6 +200,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
Obsoletes: libcxgb4-rdmav2 < %{version}-%{release}
Obsoletes: libefa-rdmav2 < %{version}-%{release}
Obsoletes: libhfi1verbs-rdmav2 < %{version}-%{release}
+Obsoletes: libhns-rdmav2 < %{version}-%{release}
Obsoletes: libipathverbs-rdmav2 < %{version}-%{release}
Obsoletes: libmlx4-rdmav2 < %{version}-%{release}
Obsoletes: libmlx5-rdmav2 < %{version}-%{release}
@@ -205,6 +209,7 @@ Obsoletes: libocrdma-rdmav2 < %{version}-%{release}
Obsoletes: librxe-rdmav2 < %{version}-%{release}
%if 0%{?dma_coherent}
Requires: %{efa_lname} = %{version}-%{release}
+Requires: %{hns_lname} = %{version}-%{release}
Requires: %{mlx4_lname} = %{version}-%{release}
Requires: %{mlx5_lname} = %{version}-%{release}
%endif
@@ -223,7 +228,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- libcxgb4: Chelsio T4 iWARP HCA
- libefa: Amazon Elastic Fabric Adapter
- libhfi1: Intel Omni-Path HFI
-- libhns: HiSilicon Hip06 SoC
+- libhns: HiSilicon Hip08+ SoC
- libipathverbs: QLogic InfiniPath HCA
- libirdma: Intel Ethernet Connection RDMA
- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
@@ -250,6 +255,13 @@ Group: System/Libraries
%description -n %efa_lname
This package contains the efa runtime library.
+%package -n %hns_lname
+Summary: HNS runtime library
+Group: System/Libraries
+
+%description -n %hns_lname
+This package contains the hns runtime library.
+
%package -n %mlx4_lname
Summary: MLX4 runtime library
Group: System/Libraries
@@ -493,6 +505,9 @@ rm -rf %{buildroot}/%{_sbindir}/srp_daemon.sh
%post -n %efa_lname -p /sbin/ldconfig
%postun -n %efa_lname -p /sbin/ldconfig
+%post -n %hns_lname -p /sbin/ldconfig
+%postun -n %hns_lname -p /sbin/ldconfig
+
%post -n %mlx4_lname -p /sbin/ldconfig
%postun -n %mlx4_lname -p /sbin/ldconfig
@@ -689,6 +704,10 @@ done
%defattr(-,root,root)
%{_libdir}/libefa*.so.*
+%files -n %hns_lname
+%defattr(-,root,root)
+%{_libdir}/libhns*.so.*
+
%files -n %mlx4_lname
%defattr(-,root,root)
%{_libdir}/libmlx4*.so.*
--
2.30.0

View File

@ -1,6 +1,6 @@
Name: rdma-core
Version: 41.0
Release: 6
Release: 7
Summary: RDMA core userspace libraries and daemons
License: GPLv2 or BSD
Url: https://github.com/linux-rdma/rdma-core
@ -36,6 +36,13 @@ Patch26: 0027-libhns-Add-RoH-device-IDs.patch
Patch27: 0028-Update-kernel-headers.patch
Patch28: 0029-libhns-Add-the-parsing-of-mac-type-in-RoH-mode.patch
Patch29: 0030-libhns-Add-support-for-the-thread-domain-and-the-par.patch
Patch30: 0031-Update-kernel-headers.patch
Patch31: 0032-libhns-Introduce-DCA-for-RC-QP.patch
Patch32: 0033-libhns-Add-support-for-shrinking-DCA-memory-pool.patch
Patch33: 0034-libhns-Add-support-for-attaching-QP-s-WQE-buffer.patch
Patch34: 0035-libhns-Use-shared-memory-to-sync-DCA-status.patch
Patch35: 0036-libhns-Sync-DCA-status-by-shared-memory.patch
Patch36: 0037-libhns-Add-direct-verbs-support-to-config-DCA.patch
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
@ -59,6 +66,8 @@ Provides: libefa = %{version}-%{release}
Obsoletes: libefa < %{version}-%{release}
Provides: libhfi1 = %{version}-%{release}
Obsoletes: libhfi1 < %{version}-%{release}
Provides: libhns = %{version}-%{release}
Obsoletes: libhns < %{version}-%{release}
Provides: libi40iw = %{version}-%{release}
Obsoletes: libi40iw < %{version}-%{release}
Provides: libipathverbs = %{version}-%{release}
@ -243,6 +252,7 @@ fi
%{_libdir}/libibmad*.so.*
%{_libdir}/libibnetdisc*.so.*
%{_libdir}/libefa.so.*
%{_libdir}/libhns.so.*
%{_libdir}/libibverbs*.so.*
%{_libdir}/libibverbs/*.so
%{_libdir}/libmlx5.so.*
@ -280,6 +290,12 @@ fi
%{_mandir}/*
%changelog
* Wed Nov 30 2022 tangchengchang <tangchengchang@huawei.com> - 41.0-7
- Type: requirement
- ID: NA
- SUG: NA
- DESC: Add support for hns DCA
* Mon Nov 28 2022 Yixing Liu <liuyixing1@huawei.com> - 41.0-6
- Type: requirement
- ID: NA