343 lines
9.6 KiB
Diff
343 lines
9.6 KiB
Diff
|
|
From 58de0f69573e8b76affe401a261f17f1a5cedc01 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Chengchang Tang <tangchengchang@huawei.com>
|
||
|
|
Date: Mon, 10 May 2021 17:13:09 +0800
|
||
|
|
Subject: libhns: Introduce DCA for RC QP
|
||
|
|
|
||
|
|
driver inclusion
|
||
|
|
category: feature
|
||
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M
|
||
|
|
|
||
|
|
----------------------------------------------------------
|
||
|
|
|
||
|
|
The HIP09 introduces the DCA(Dynamic context attachment) feature which
|
||
|
|
supports many RC QPs to share the WQE buffer in a memory pool, this will
|
||
|
|
reduce the memory consumption when there are too many QPs inactive.
|
||
|
|
|
||
|
|
Two functions are defined for adding buffers to memory pool and removing
|
||
|
|
buffers from memory pool by calling ib cmd implemented in hns kernelspace
|
||
|
|
driver.
|
||
|
|
|
||
|
|
If a QP enables DCA feature, the WQE's buffer will be attached to the
|
||
|
|
memory pool when the users start to post WRs and be detached when all CQEs
|
||
|
|
has been polled.
|
||
|
|
|
||
|
|
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
||
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
||
|
|
---
|
||
|
|
providers/hns/hns_roce_u.c | 59 ++++++++++++-
|
||
|
|
providers/hns/hns_roce_u.h | 21 +++++
|
||
|
|
providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++
|
||
|
|
3 files changed, 223 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||
|
|
index 3428bda..bd2b251 100644
|
||
|
|
--- a/providers/hns/hns_roce_u.c
|
||
|
|
+++ b/providers/hns/hns_roce_u.c
|
||
|
|
@@ -95,6 +95,53 @@ static const struct verbs_context_ops hns_common_ops = {
|
||
|
|
.alloc_parent_domain = hns_roce_u_alloc_pad,
|
||
|
|
};
|
||
|
|
|
||
|
|
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
|
||
|
|
+{
|
||
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
||
|
|
+ int ret;
|
||
|
|
+
|
||
|
|
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
|
||
|
|
+ return 0;
|
||
|
|
+
|
||
|
|
+ list_head_init(&dca_ctx->mem_list);
|
||
|
|
+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
|
||
|
|
+ if (ret)
|
||
|
|
+ return ret;
|
||
|
|
+
|
||
|
|
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
|
||
|
|
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
|
||
|
|
+ dca_ctx->mem_cnt = 0;
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void uninit_dca_context(struct hns_roce_context *ctx)
|
||
|
|
+{
|
||
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
||
|
|
+
|
||
|
|
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ pthread_spin_lock(&dca_ctx->lock);
|
||
|
|
+ hns_roce_cleanup_dca_mem(ctx);
|
||
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
||
|
|
+
|
||
|
|
+ pthread_spin_destroy(&dca_ctx->lock);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
|
||
|
|
+ struct hns_roce_context *context, int cmd_fd)
|
||
|
|
+{
|
||
|
|
+ int page_size = hr_dev->page_size;
|
||
|
|
+
|
||
|
|
+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
|
||
|
|
+ MAP_SHARED, cmd_fd, 0);
|
||
|
|
+ if (context->uar == MAP_FAILED)
|
||
|
|
+ return -ENOMEM;
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
|
||
|
|
{
|
||
|
|
uint32_t count_shift = hr_ilog32(entry_count);
|
||
|
|
@@ -119,7 +166,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
return NULL;
|
||
|
|
|
||
|
|
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
|
||
|
|
- HNS_ROCE_CQE_INLINE_FLAGS;
|
||
|
|
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
|
||
|
|
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
|
||
|
|
&resp.ibv_resp, sizeof(resp)))
|
||
|
|
goto err_free;
|
||
|
|
@@ -165,11 +212,12 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
context->max_srq_wr = dev_attrs.max_srq_wr;
|
||
|
|
context->max_srq_sge = dev_attrs.max_srq_sge;
|
||
|
|
|
||
|
|
- context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE,
|
||
|
|
- MAP_SHARED, cmd_fd, 0);
|
||
|
|
- if (context->uar == MAP_FAILED)
|
||
|
|
+ if (init_dca_context(context, hr_dev->page_size))
|
||
|
|
goto err_free;
|
||
|
|
|
||
|
|
+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
|
||
|
|
+ goto dca_free;
|
||
|
|
+
|
||
|
|
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
|
||
|
|
|
||
|
|
verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
|
||
|
|
@@ -177,6 +225,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
||
|
|
|
||
|
|
return &context->ibv_ctx;
|
||
|
|
|
||
|
|
+dca_free:
|
||
|
|
+ uninit_dca_context(context);
|
||
|
|
err_free:
|
||
|
|
verbs_uninit_context(&context->ibv_ctx);
|
||
|
|
free(context);
|
||
|
|
@@ -189,6 +239,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
|
||
|
|
struct hns_roce_context *context = to_hr_ctx(ibctx);
|
||
|
|
|
||
|
|
munmap(context->uar, hr_dev->page_size);
|
||
|
|
+ uninit_dca_context(context);
|
||
|
|
verbs_uninit_context(&context->ibv_ctx);
|
||
|
|
free(context);
|
||
|
|
}
|
||
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||
|
|
index 2edb07e..0e25ce5 100644
|
||
|
|
--- a/providers/hns/hns_roce_u.h
|
||
|
|
+++ b/providers/hns/hns_roce_u.h
|
||
|
|
@@ -147,6 +147,10 @@
|
||
|
|
|
||
|
|
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
|
||
|
|
|
||
|
|
+enum {
|
||
|
|
+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
#define HNS_ROCE_QP_TABLE_BITS 8
|
||
|
|
#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
|
||
|
|
|
||
|
|
@@ -199,6 +203,18 @@ struct hns_roce_spinlock {
|
||
|
|
int need_lock;
|
||
|
|
};
|
||
|
|
|
||
|
|
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
|
||
|
|
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
|
||
|
|
+
|
||
|
|
+struct hns_roce_dca_ctx {
|
||
|
|
+ struct list_head mem_list;
|
||
|
|
+ pthread_spinlock_t lock;
|
||
|
|
+ int mem_cnt;
|
||
|
|
+ unsigned int unit_size;
|
||
|
|
+ uint64_t max_size;
|
||
|
|
+ uint64_t curr_size;
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
struct hns_roce_context {
|
||
|
|
struct verbs_context ibv_ctx;
|
||
|
|
void *uar;
|
||
|
|
@@ -231,6 +247,8 @@ struct hns_roce_context {
|
||
|
|
unsigned int cqe_size;
|
||
|
|
uint32_t config;
|
||
|
|
unsigned int max_inline_data;
|
||
|
|
+
|
||
|
|
+ struct hns_roce_dca_ctx dca_ctx;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct hns_roce_td {
|
||
|
|
@@ -562,6 +580,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
|
||
|
|
|
||
|
|
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
|
||
|
|
|
||
|
|
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
|
||
|
|
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
|
||
|
|
+
|
||
|
|
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
|
||
|
|
|
||
|
|
extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
|
||
|
|
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
|
||
|
|
index 471dd9c..02c43ae 100644
|
||
|
|
--- a/providers/hns/hns_roce_u_buf.c
|
||
|
|
+++ b/providers/hns/hns_roce_u_buf.c
|
||
|
|
@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
|
||
|
|
|
||
|
|
munmap(buf->buf, buf->length);
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+struct hns_roce_dca_mem {
|
||
|
|
+ uint32_t handle;
|
||
|
|
+ struct list_node entry;
|
||
|
|
+ struct hns_roce_buf buf;
|
||
|
|
+ struct hns_roce_context *ctx;
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+static void free_dca_mem(struct hns_roce_context *ctx,
|
||
|
|
+ struct hns_roce_dca_mem *mem)
|
||
|
|
+{
|
||
|
|
+ hns_roce_free_buf(&mem->buf);
|
||
|
|
+ free(mem);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
|
||
|
|
+{
|
||
|
|
+ struct hns_roce_dca_mem *mem = NULL;
|
||
|
|
+ int ret;
|
||
|
|
+
|
||
|
|
+ mem = malloc(sizeof(struct hns_roce_dca_mem));
|
||
|
|
+ if (!mem) {
|
||
|
|
+ errno = ENOMEM;
|
||
|
|
+ return NULL;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
|
||
|
|
+ if (ret) {
|
||
|
|
+ errno = ENOMEM;
|
||
|
|
+ free(mem);
|
||
|
|
+ return NULL;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return mem;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
|
||
|
|
+{
|
||
|
|
+ return (uintptr_t)dca_mem;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
|
||
|
|
+{
|
||
|
|
+ return dca_mem->buf.buf + offset;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
|
||
|
|
+ void *addr, uint32_t size, uint32_t *handle)
|
||
|
|
+{
|
||
|
|
+ struct ib_uverbs_attr *attr;
|
||
|
|
+ int ret;
|
||
|
|
+
|
||
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
||
|
|
+ HNS_IB_METHOD_DCA_MEM_REG, 4);
|
||
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
|
||
|
|
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
|
||
|
|
+ ioctl_ptr_to_u64(addr));
|
||
|
|
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
|
||
|
|
+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
|
||
|
|
+
|
||
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
||
|
|
+ if (ret) {
|
||
|
|
+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
|
||
|
|
+ ret);
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
|
||
|
|
+{
|
||
|
|
+ int ret;
|
||
|
|
+
|
||
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
||
|
|
+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
|
||
|
|
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
|
||
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
||
|
|
+ if (ret)
|
||
|
|
+ verbs_warn(&ctx->ibv_ctx,
|
||
|
|
+ "failed to dereg DCA mem-%u, ret = %d.\n",
|
||
|
|
+ handle, ret);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
|
||
|
|
+{
|
||
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
||
|
|
+ struct hns_roce_dca_mem *mem;
|
||
|
|
+ struct hns_roce_dca_mem *tmp;
|
||
|
|
+
|
||
|
|
+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
|
||
|
|
+ deregister_dca_mem(ctx, mem->handle);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
|
||
|
|
+ uint32_t alloc_size)
|
||
|
|
+{
|
||
|
|
+ bool enable;
|
||
|
|
+
|
||
|
|
+ pthread_spin_lock(&ctx->lock);
|
||
|
|
+
|
||
|
|
+ if (ctx->unit_size == 0) /* Pool size can't be increased */
|
||
|
|
+ enable = false;
|
||
|
|
+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
|
||
|
|
+ enable = true;
|
||
|
|
+ else /* Pool size doesn't exceed max size */
|
||
|
|
+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
|
||
|
|
+
|
||
|
|
+ pthread_spin_unlock(&ctx->lock);
|
||
|
|
+
|
||
|
|
+ return enable;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
|
||
|
|
+{
|
||
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
||
|
|
+ struct hns_roce_dca_mem *mem;
|
||
|
|
+ int ret;
|
||
|
|
+
|
||
|
|
+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
|
||
|
|
+ return -ENOMEM;
|
||
|
|
+
|
||
|
|
+ /* Step 1: Alloc DCA mem address */
|
||
|
|
+ mem = alloc_dca_mem(
|
||
|
|
+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
|
||
|
|
+ if (!mem)
|
||
|
|
+ return -ENOMEM;
|
||
|
|
+
|
||
|
|
+ /* Step 2: Register DCA mem uobject to pin user address */
|
||
|
|
+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
|
||
|
|
+ mem->buf.length, &mem->handle);
|
||
|
|
+ if (ret) {
|
||
|
|
+ free_dca_mem(ctx, mem);
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Step 3: Add DCA mem node to pool */
|
||
|
|
+ pthread_spin_lock(&dca_ctx->lock);
|
||
|
|
+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
|
||
|
|
+ dca_ctx->mem_cnt++;
|
||
|
|
+ dca_ctx->curr_size += mem->buf.length;
|
||
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.30.0
|
||
|
|
|