From a5e62921afc2fcc152e8b0584f2d04d1a4db4f10 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Tue, 29 Jun 2021 20:06:47 +0800 Subject: libhns: Use shared memory to sync DCA status driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I63L1M ---------------------------------------------------------- The user DCA needs to check the QP attaching state before filling wqe buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but this will result in too much time being wasted on system calls, so use a shared table between user driver and kernel driver to sync DCA status. Signed-off-by: Chengchang Tang Reviewed-by: Yangyang Li --- providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- providers/hns/hns_roce_u.h | 10 ++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index bd2b251..fe30cda 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -95,9 +95,33 @@ static const struct verbs_context_ops hns_common_ops = { .alloc_parent_domain = hns_roce_u_alloc_pad, }; -static int init_dca_context(struct hns_roce_context *ctx, int page_size) +static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + int page_size, size_t size, uint64_t mmap_key) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + void *addr; + + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, + mmap_key); + if (addr == MAP_FAILED) { + verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); + return -EINVAL; + } + + dca_ctx->buf_status = addr; + dca_ctx->sync_status = addr + size / 2; + + return 0; +} + +static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, + int page_size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + uint64_t mmap_key = resp->dca_mmap_key; + int mmap_size = resp->dca_mmap_size; + int max_qps = resp->dca_qps; int ret; if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) @@ -112,6 +136,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; dca_ctx->mem_cnt = 0; + if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; + + if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { + dca_ctx->status_size = mmap_size; + dca_ctx->max_qps = min_t(int, max_qps, + mmap_size * 8 / bits_per_qp); + } + } + return 0; } @@ -125,6 +159,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) pthread_spin_lock(&dca_ctx->lock); hns_roce_cleanup_dca_mem(ctx); pthread_spin_unlock(&dca_ctx->lock); + if (dca_ctx->buf_status) + munmap(dca_ctx->buf_status, dca_ctx->status_size); pthread_spin_destroy(&dca_ctx->lock); } @@ -149,6 +185,14 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) return count_shift > size_shift ? count_shift - size_shift : 0; } +static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) +{ + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | + HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; + cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; +} + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) @@ -165,8 +209,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (!context) return NULL; - cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | - HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + ucontext_set_cmd(&cmd, hr_dev->page_size); if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -212,7 +255,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, context->max_srq_wr = dev_attrs.max_srq_wr; context->max_srq_sge = dev_attrs.max_srq_sge; - if (init_dca_context(context, hr_dev->page_size)) + if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) goto err_free; if (hns_roce_mmap(hr_dev, context, cmd_fd)) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 44a733f..a8f811e 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include "hns_roce_u_abi.h" @@ -52,6 +54,8 @@ #define PFX "hns: " +typedef _Atomic(uint64_t) atomic_bitmap_t; + /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -214,6 +218,12 @@ struct hns_roce_dca_ctx { uint64_t max_size; uint64_t min_size; uint64_t curr_size; + +#define HNS_DCA_BITS_PER_STATUS 1 + unsigned int max_qps; + unsigned int status_size; + atomic_bitmap_t *buf_status; + atomic_bitmap_t *sync_status; }; struct hns_roce_context { -- 2.30.0