DCA(Dynamic context attachment) support many RC QPs to share the WQE buffer in a memory pool, this help reducing the memory consumption when there are many QPs are inactive. Signed-off-by: Ran Zhou <zhouran10@h-partners.com> (cherry picked from commit 994c08d7e68ba906b7f7c16e8528700508af94b1)
347 lines
9.8 KiB
Diff
347 lines
9.8 KiB
Diff
From f0d70762b8c69e735a1d15f8379b649bcad3929c Mon Sep 17 00:00:00 2001
|
|
From: Chengchang Tang <tangchengchang@huawei.com>
|
|
Date: Mon, 10 May 2021 17:13:09 +0800
|
|
Subject: [PATCH 20/25] libhns: Introduce DCA for RC QP
|
|
|
|
driver inclusion
|
|
category: feature
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
|
|
|
|
------------------------------------------------------------------
|
|
|
|
The HIP09 introduces the DCA(Dynamic context attachment) feature which
|
|
supports many RC QPs to share the WQE buffer in a memory pool, this will
|
|
reduce the memory consumption when there are too many QPs inactive.
|
|
|
|
Two functions are defined for adding buffers to memory pool and removing
|
|
buffers from memory pool by calling ib cmd implemented in hns kernelspace
|
|
driver.
|
|
|
|
If a QP enables DCA feature, the WQE's buffer will be attached to the
|
|
memory pool when the users start to post WRs and be detached when all CQEs
|
|
has been polled.
|
|
|
|
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u.c | 61 +++++++++++++-
|
|
providers/hns/hns_roce_u.h | 21 ++++-
|
|
providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++
|
|
3 files changed, 226 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
|
index 810b650..2272431 100644
|
|
--- a/providers/hns/hns_roce_u.c
|
|
+++ b/providers/hns/hns_roce_u.c
|
|
@@ -100,6 +100,53 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
|
|
return count_shift > size_shift ? count_shift - size_shift : 0;
|
|
}
|
|
|
|
+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
|
|
+ struct hns_roce_context *context, int cmd_fd)
|
|
+{
|
|
+ int page_size = hr_dev->page_size;
|
|
+
|
|
+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
|
|
+ MAP_SHARED, cmd_fd, 0);
|
|
+ if (context->uar == MAP_FAILED)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
|
|
+{
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
+ int ret;
|
|
+
|
|
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
|
|
+ return 0;
|
|
+
|
|
+ list_head_init(&dca_ctx->mem_list);
|
|
+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
|
|
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
|
|
+ dca_ctx->mem_cnt = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void uninit_dca_context(struct hns_roce_context *ctx)
|
|
+{
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
+
|
|
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
|
|
+ return;
|
|
+
|
|
+ pthread_spin_lock(&dca_ctx->lock);
|
|
+ hns_roce_cleanup_dca_mem(ctx);
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
|
+
|
|
+ pthread_spin_destroy(&dca_ctx->lock);
|
|
+}
|
|
+
|
|
static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
|
|
struct hns_roce_alloc_ucontext_resp *resp,
|
|
int page_size)
|
|
@@ -185,7 +232,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
|
return NULL;
|
|
|
|
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
|
|
- HNS_ROCE_CQE_INLINE_FLAGS;
|
|
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
|
|
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
|
|
&resp.ibv_resp, sizeof(resp)))
|
|
goto err_free;
|
|
@@ -198,9 +245,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
|
if (context->uar == MAP_FAILED)
|
|
goto err_free;
|
|
|
|
+ if (init_dca_context(context, hr_dev->page_size))
|
|
+ goto err_free;
|
|
+
|
|
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
|
|
goto reset_free;
|
|
|
|
+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
|
|
+ goto uar_free;
|
|
+
|
|
pthread_mutex_init(&context->qp_table_mutex, NULL);
|
|
pthread_mutex_init(&context->srq_table_mutex, NULL);
|
|
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
|
|
@@ -210,8 +263,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
|
|
|
return &context->ibv_ctx;
|
|
|
|
+uar_free:
|
|
+ if (context->reset_state)
|
|
+ munmap(context->reset_state, hr_dev->page_size);
|
|
reset_free:
|
|
- munmap(context->uar, hr_dev->page_size);
|
|
+ uninit_dca_context(context);
|
|
err_free:
|
|
verbs_uninit_context(&context->ibv_ctx);
|
|
free(context);
|
|
@@ -226,6 +282,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
|
|
munmap(context->uar, hr_dev->page_size);
|
|
if (context->reset_state)
|
|
munmap(context->reset_state, hr_dev->page_size);
|
|
+ uninit_dca_context(context);
|
|
verbs_uninit_context(&context->ibv_ctx);
|
|
free(context);
|
|
}
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
|
index 024932a..90b2205 100644
|
|
--- a/providers/hns/hns_roce_u.h
|
|
+++ b/providers/hns/hns_roce_u.h
|
|
@@ -147,6 +147,10 @@
|
|
|
|
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
|
|
|
|
+enum {
|
|
+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
|
|
+};
|
|
+
|
|
#define HNS_ROCE_QP_TABLE_BITS 8
|
|
#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
|
|
|
|
@@ -201,6 +205,18 @@ struct hns_roce_spinlock {
|
|
int need_lock;
|
|
};
|
|
|
|
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
|
|
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
|
|
+
|
|
+struct hns_roce_dca_ctx {
|
|
+ struct list_head mem_list;
|
|
+ pthread_spinlock_t lock;
|
|
+ int mem_cnt;
|
|
+ unsigned int unit_size;
|
|
+ uint64_t max_size;
|
|
+ uint64_t curr_size;
|
|
+};
|
|
+
|
|
struct hns_roce_v2_reset_state {
|
|
uint32_t is_reset;
|
|
uint32_t hw_ready;
|
|
@@ -239,7 +255,7 @@ struct hns_roce_context {
|
|
unsigned int cqe_size;
|
|
uint32_t config;
|
|
unsigned int max_inline_data;
|
|
-
|
|
+ struct hns_roce_dca_ctx dca_ctx;
|
|
bool use_new_reset_flag;
|
|
bool reseted;
|
|
};
|
|
@@ -586,6 +602,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
|
|
|
|
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
|
|
|
|
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
|
|
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
|
|
+
|
|
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
|
|
|
|
bool is_hns_dev(struct ibv_device *device);
|
|
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
|
|
index 471dd9c..02c43ae 100644
|
|
--- a/providers/hns/hns_roce_u_buf.c
|
|
+++ b/providers/hns/hns_roce_u_buf.c
|
|
@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
|
|
|
|
munmap(buf->buf, buf->length);
|
|
}
|
|
+
|
|
+struct hns_roce_dca_mem {
|
|
+ uint32_t handle;
|
|
+ struct list_node entry;
|
|
+ struct hns_roce_buf buf;
|
|
+ struct hns_roce_context *ctx;
|
|
+};
|
|
+
|
|
+static void free_dca_mem(struct hns_roce_context *ctx,
|
|
+ struct hns_roce_dca_mem *mem)
|
|
+{
|
|
+ hns_roce_free_buf(&mem->buf);
|
|
+ free(mem);
|
|
+}
|
|
+
|
|
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
|
|
+{
|
|
+ struct hns_roce_dca_mem *mem = NULL;
|
|
+ int ret;
|
|
+
|
|
+ mem = malloc(sizeof(struct hns_roce_dca_mem));
|
|
+ if (!mem) {
|
|
+ errno = ENOMEM;
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
|
|
+ if (ret) {
|
|
+ errno = ENOMEM;
|
|
+ free(mem);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ return mem;
|
|
+}
|
|
+
|
|
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
|
|
+{
|
|
+ return (uintptr_t)dca_mem;
|
|
+}
|
|
+
|
|
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
|
|
+{
|
|
+ return dca_mem->buf.buf + offset;
|
|
+}
|
|
+
|
|
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
|
|
+ void *addr, uint32_t size, uint32_t *handle)
|
|
+{
|
|
+ struct ib_uverbs_attr *attr;
|
|
+ int ret;
|
|
+
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
|
+ HNS_IB_METHOD_DCA_MEM_REG, 4);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
|
|
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
|
|
+ ioctl_ptr_to_u64(addr));
|
|
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
|
|
+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
|
|
+
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
|
+ if (ret) {
|
|
+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
|
|
+ ret);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
|
+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
|
|
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
|
+ if (ret)
|
|
+ verbs_warn(&ctx->ibv_ctx,
|
|
+ "failed to dereg DCA mem-%u, ret = %d.\n",
|
|
+ handle, ret);
|
|
+}
|
|
+
|
|
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
|
|
+{
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
+ struct hns_roce_dca_mem *mem;
|
|
+ struct hns_roce_dca_mem *tmp;
|
|
+
|
|
+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
|
|
+ deregister_dca_mem(ctx, mem->handle);
|
|
+}
|
|
+
|
|
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
|
|
+ uint32_t alloc_size)
|
|
+{
|
|
+ bool enable;
|
|
+
|
|
+ pthread_spin_lock(&ctx->lock);
|
|
+
|
|
+ if (ctx->unit_size == 0) /* Pool size can't be increased */
|
|
+ enable = false;
|
|
+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
|
|
+ enable = true;
|
|
+ else /* Pool size doesn't exceed max size */
|
|
+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
|
|
+
|
|
+ pthread_spin_unlock(&ctx->lock);
|
|
+
|
|
+ return enable;
|
|
+}
|
|
+
|
|
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
|
|
+{
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
+ struct hns_roce_dca_mem *mem;
|
|
+ int ret;
|
|
+
|
|
+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ /* Step 1: Alloc DCA mem address */
|
|
+ mem = alloc_dca_mem(
|
|
+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
|
|
+ if (!mem)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ /* Step 2: Register DCA mem uobject to pin user address */
|
|
+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
|
|
+ mem->buf.length, &mem->handle);
|
|
+ if (ret) {
|
|
+ free_dca_mem(ctx, mem);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ /* Step 3: Add DCA mem node to pool */
|
|
+ pthread_spin_lock(&dca_ctx->lock);
|
|
+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
|
|
+ dca_ctx->mem_cnt++;
|
|
+ dca_ctx->curr_size += mem->buf.length;
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--
|
|
2.33.0
|
|
|