DCA(Dynamic context attachment) support many RC QPs to share the WQE buffer in a memory pool, this help reducing the memory consumption when there are many QPs are inactive. Signed-off-by: Ran Zhou <zhouran10@h-partners.com> (cherry picked from commit 994c08d7e68ba906b7f7c16e8528700508af94b1)
576 lines
17 KiB
Diff
576 lines
17 KiB
Diff
From a1a5d42a2c48660c040695bd8316538a9ce83ab2 Mon Sep 17 00:00:00 2001
|
|
From: Chengchang Tang <tangchengchang@huawei.com>
|
|
Date: Mon, 10 May 2021 17:13:17 +0800
|
|
Subject: [PATCH 22/25] libhns: Add support for attaching QP's WQE buffer
|
|
|
|
driver inclusion
|
|
category: feature
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
|
|
|
|
------------------------------------------------------------------
|
|
|
|
If a uQP works in DCA mode, the WQE's buffer will be split as many blocks
|
|
and be stored into a list. The blocks are allocated from the DCA's memory
|
|
pool before posting WRs and are dropped when the QP's CI is equal to PI
|
|
after polling CQ.
|
|
|
|
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u.h | 26 ++++-
|
|
providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++-
|
|
providers/hns/hns_roce_u_hw_v2.c | 125 +++++++++++++++++++++-
|
|
providers/hns/hns_roce_u_hw_v2.h | 2 +
|
|
providers/hns/hns_roce_u_verbs.c | 32 ++++--
|
|
5 files changed, 345 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
|
index e3fa24d..ba646d3 100644
|
|
--- a/providers/hns/hns_roce_u.h
|
|
+++ b/providers/hns/hns_roce_u.h
|
|
@@ -365,11 +365,18 @@ struct hns_roce_sge_ex {
|
|
unsigned int sge_shift;
|
|
};
|
|
|
|
+struct hns_roce_dca_buf {
|
|
+ void **bufs;
|
|
+ unsigned int max_cnt;
|
|
+ unsigned int shift;
|
|
+};
|
|
+
|
|
struct hns_roce_qp {
|
|
struct verbs_qp verbs_qp;
|
|
struct hns_roce_buf buf;
|
|
+ struct hns_roce_dca_buf dca_wqe;
|
|
int max_inline_data;
|
|
- int buf_size;
|
|
+ unsigned int buf_size;
|
|
unsigned int sq_signal_bits;
|
|
struct hns_roce_wq sq;
|
|
struct hns_roce_wq rq;
|
|
@@ -423,11 +430,22 @@ struct hns_roce_u_hw {
|
|
struct verbs_context_ops hw_ops;
|
|
};
|
|
|
|
+struct hns_roce_dca_attach_attr {
|
|
+ uint32_t sq_offset;
|
|
+ uint32_t sge_offset;
|
|
+ uint32_t rq_offset;
|
|
+};
|
|
+
|
|
+struct hns_roce_dca_detach_attr {
|
|
+ uint32_t sq_index;
|
|
+};
|
|
+
|
|
/*
|
|
* The entries's buffer should be aligned to a multiple of the hardware's
|
|
* minimum page size.
|
|
*/
|
|
#define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
|
|
+#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
|
|
|
|
static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
|
|
{
|
|
@@ -603,9 +621,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
|
|
|
|
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
|
|
|
|
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_attach_attr *attr,
|
|
+ uint32_t size, struct hns_roce_dca_buf *buf);
|
|
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_detach_attr *attr);
|
|
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
|
|
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
|
|
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
|
|
|
|
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
|
|
|
|
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
|
|
index c0f86e9..3d41b89 100644
|
|
--- a/providers/hns/hns_roce_u_buf.c
|
|
+++ b/providers/hns/hns_roce_u_buf.c
|
|
@@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
|
|
return ret;
|
|
}
|
|
+
|
|
+struct hns_dca_mem_query_resp {
|
|
+ uint64_t key;
|
|
+ uint32_t offset;
|
|
+ uint32_t page_count;
|
|
+};
|
|
+
|
|
+static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ uint32_t index, struct hns_dca_mem_query_resp *resp)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
|
+ HNS_IB_METHOD_DCA_MEM_QUERY, 5);
|
|
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
|
|
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
|
|
+ &resp->key, sizeof(resp->key));
|
|
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
|
|
+ &resp->offset, sizeof(resp->offset));
|
|
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
|
|
+ &resp->page_count, sizeof(resp->page_count));
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
|
+ if (ret)
|
|
+ verbs_err(&ctx->ibv_ctx,
|
|
+ "failed to query DCA mem-%u, ret = %d.\n",
|
|
+ handle, ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_detach_attr *attr)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
|
+ HNS_IB_METHOD_DCA_MEM_DETACH, 4);
|
|
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
|
|
+ attr->sq_index);
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
|
+ if (ret)
|
|
+ verbs_warn(&ctx->ibv_ctx,
|
|
+ "failed to detach DCA mem-%u, ret = %d.\n",
|
|
+ handle, ret);
|
|
+}
|
|
+
|
|
+struct hns_dca_mem_attach_resp {
|
|
+#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
|
|
+ uint32_t alloc_flags;
|
|
+ uint32_t alloc_pages;
|
|
+};
|
|
+
|
|
+static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_attach_attr *attr,
|
|
+ struct hns_dca_mem_attach_resp *resp)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
|
|
+ HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
|
|
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
|
|
+ attr->sq_offset);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
|
|
+ attr->sge_offset);
|
|
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
|
|
+ attr->rq_offset);
|
|
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
|
|
+ &resp->alloc_flags, sizeof(resp->alloc_flags));
|
|
+ fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
|
|
+ &resp->alloc_pages, sizeof(resp->alloc_pages));
|
|
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
|
|
+ if (ret)
|
|
+ verbs_err(&ctx->ibv_ctx,
|
|
+ "failed to attach DCA mem-%u, ret = %d.\n",
|
|
+ handle, ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
|
|
uint32_t alloc_size)
|
|
{
|
|
@@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
|
|
return enable;
|
|
}
|
|
|
|
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
|
|
+static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
|
|
{
|
|
struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
struct hns_roce_dca_mem *mem;
|
|
@@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
|
|
dca_mem_cnt--;
|
|
}
|
|
}
|
|
+
|
|
+static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf,
|
|
+ uint32_t page_index, int page_count)
|
|
+{
|
|
+ void **pages = &buf->bufs[page_index];
|
|
+ int page_size = 1 << buf->shift;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < page_count; i++) {
|
|
+ pages[i] = addr;
|
|
+ addr += page_size;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_buf *buf, uint32_t page_count)
|
|
+{
|
|
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
|
|
+ struct hns_dca_mem_query_resp resp = {};
|
|
+ struct hns_roce_dca_mem *mem;
|
|
+ uint32_t idx = 0;
|
|
+ int ret;
|
|
+
|
|
+ while (idx < page_count && idx < buf->max_cnt) {
|
|
+ resp.page_count = 0;
|
|
+ ret = query_dca_mem(ctx, handle, idx, &resp);
|
|
+ if (ret)
|
|
+ return -ENOMEM;
|
|
+ if (resp.page_count < 1)
|
|
+ break;
|
|
+
|
|
+ pthread_spin_lock(&dca_ctx->lock);
|
|
+ mem = key_to_dca_mem(dca_ctx, resp.key);
|
|
+ if (mem && resp.offset < mem->buf.length) {
|
|
+ config_dca_pages(dca_mem_addr(mem, resp.offset),
|
|
+ buf, idx, resp.page_count);
|
|
+ } else {
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
|
+ break;
|
|
+ }
|
|
+ pthread_spin_unlock(&dca_ctx->lock);
|
|
+
|
|
+ idx += resp.page_count;
|
|
+ }
|
|
+
|
|
+ return (idx >= page_count) ? 0 : -ENOMEM;
|
|
+}
|
|
+
|
|
+#define DCA_EXPAND_MEM_TRY_TIMES 3
|
|
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
+ struct hns_roce_dca_attach_attr *attr,
|
|
+ uint32_t size, struct hns_roce_dca_buf *buf)
|
|
+{
|
|
+ uint32_t buf_pages = size >> buf->shift;
|
|
+ struct hns_dca_mem_attach_resp resp = {};
|
|
+ bool is_new_buf = true;
|
|
+ int try_times = 0;
|
|
+ int ret = 0;
|
|
+
|
|
+ do {
|
|
+ resp.alloc_pages = 0;
|
|
+ ret = attach_dca_mem(ctx, handle, attr, &resp);
|
|
+ if (ret)
|
|
+ break;
|
|
+
|
|
+ if (resp.alloc_pages >= buf_pages) {
|
|
+ is_new_buf = !!(resp.alloc_flags &
|
|
+ HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ret = add_dca_mem(ctx, size);
|
|
+ if (ret)
|
|
+ break;
|
|
+ } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
|
|
+
|
|
+ if (ret || resp.alloc_pages < buf_pages) {
|
|
+ verbs_err(&ctx->ibv_ctx,
|
|
+ "failed to attach, size %u count %u != %u, ret = %d.\n",
|
|
+ size, buf_pages, resp.alloc_pages, ret);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /* No need config user address if DCA config not changed */
|
|
+ if (!is_new_buf && buf->bufs[0])
|
|
+ return 0;
|
|
+
|
|
+ return setup_dca_buf(ctx, handle, buf, buf_pages);
|
|
+}
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index 0a100b8..7a93456 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
|
|
return get_sw_cqe_v2(cq, cq->cons_index);
|
|
}
|
|
|
|
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
|
|
+{
|
|
+ return !!qp->dca_wqe.bufs;
|
|
+}
|
|
+
|
|
+static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
|
|
+{
|
|
+ if (likely(qp->buf.buf))
|
|
+ return qp->buf.buf + offset;
|
|
+ else if (unlikely(check_qp_dca_enable(qp)))
|
|
+ return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] +
|
|
+ (offset & ((1 << qp->dca_wqe.shift) - 1));
|
|
+ else
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
|
|
{
|
|
- return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
|
|
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
|
|
}
|
|
|
|
static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
|
|
{
|
|
- return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
|
|
+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
|
|
}
|
|
|
|
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
|
{
|
|
- return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
|
|
+ return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
|
|
}
|
|
|
|
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
|
@@ -580,6 +596,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc,
|
|
wc->opcode = wc_send_op_map[opcode];
|
|
}
|
|
|
|
+static bool check_dca_attach_enable(struct hns_roce_qp *qp)
|
|
+{
|
|
+ return check_qp_dca_enable(qp) &&
|
|
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH);
|
|
+}
|
|
+
|
|
+static bool check_dca_detach_enable(struct hns_roce_qp *qp)
|
|
+{
|
|
+ return check_qp_dca_enable(qp) &&
|
|
+ (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH);
|
|
+}
|
|
+
|
|
+static int dca_attach_qp_buf(struct hns_roce_context *ctx,
|
|
+ struct hns_roce_qp *qp)
|
|
+{
|
|
+ struct hns_roce_dca_attach_attr attr = {};
|
|
+ uint32_t idx;
|
|
+ int ret;
|
|
+
|
|
+ hns_roce_spin_lock(&qp->sq.hr_lock);
|
|
+ hns_roce_spin_lock(&qp->rq.hr_lock);
|
|
+
|
|
+ if (qp->sq.wqe_cnt > 0) {
|
|
+ idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
|
|
+ attr.sq_offset = idx << qp->sq.wqe_shift;
|
|
+ }
|
|
+
|
|
+ if (qp->ex_sge.sge_cnt > 0) {
|
|
+ idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
|
|
+ attr.sge_offset = idx << qp->ex_sge.sge_shift;
|
|
+ }
|
|
+
|
|
+ if (qp->rq.wqe_cnt > 0) {
|
|
+ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
|
|
+ attr.rq_offset = idx << qp->rq.wqe_shift;
|
|
+ }
|
|
+
|
|
+
|
|
+ ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
|
|
+ qp->buf_size, &qp->dca_wqe);
|
|
+
|
|
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
|
|
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void dca_detach_qp_buf(struct hns_roce_context *ctx,
|
|
+ struct hns_roce_qp *qp)
|
|
+{
|
|
+ struct hns_roce_dca_detach_attr attr;
|
|
+ bool is_empty;
|
|
+
|
|
+ hns_roce_spin_lock(&qp->sq.hr_lock);
|
|
+ hns_roce_spin_lock(&qp->rq.hr_lock);
|
|
+
|
|
+ is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
|
|
+ if (is_empty && qp->sq.wqe_cnt > 0)
|
|
+ attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
|
|
+
|
|
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
|
|
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
|
|
+
|
|
+ if (is_empty && qp->sq.wqe_cnt > 0)
|
|
+ hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
|
|
+}
|
|
+
|
|
static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx,
|
|
struct hns_roce_cq *cq)
|
|
{
|
|
@@ -919,6 +1002,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
|
|
|
for (npolled = 0; npolled < ne; ++npolled) {
|
|
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
|
|
+ if (qp && check_dca_detach_enable(qp))
|
|
+ dca_detach_qp_buf(ctx, qp);
|
|
+
|
|
if (err != V2_CQ_OK)
|
|
break;
|
|
}
|
|
@@ -970,7 +1056,7 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
|
|
|
if (unlikely(ibvqp->state == IBV_QPS_RESET ||
|
|
ibvqp->state == IBV_QPS_INIT ||
|
|
- ibvqp->state == IBV_QPS_RTR)){
|
|
+ ibvqp->state == IBV_QPS_RTR)) {
|
|
verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
|
|
"unsupported qp state, state = %d.\n", ibvqp->state);
|
|
return EINVAL;
|
|
@@ -980,6 +1066,14 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
|
return EIO;
|
|
}
|
|
|
|
+ if (check_dca_attach_enable(qp)) {
|
|
+ ret = dca_attach_qp_buf(ctx, qp);
|
|
+ if (ret)
|
|
+ verbs_err_datapath(&ctx->ibv_ctx,
|
|
+ "failed to attach QP-%u send, ret = %d.\n",
|
|
+ qp->verbs_qp.qp.qp_num, ret);
|
|
+ }
|
|
+
|
|
return ret;
|
|
}
|
|
|
|
@@ -1347,6 +1441,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
|
|
return 0;
|
|
}
|
|
|
|
+static inline void fill_rc_dca_fields(uint32_t qp_num,
|
|
+ struct hns_roce_rc_sq_wqe *wqe)
|
|
+{
|
|
+ hr_reg_write(wqe, RCWQE_SQPN_L, qp_num);
|
|
+ hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH);
|
|
+}
|
|
+
|
|
static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
|
|
const struct ibv_send_wr *wr)
|
|
{
|
|
@@ -1454,6 +1555,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
|
return ret;
|
|
|
|
wqe_valid:
|
|
+ if (check_qp_dca_enable(qp))
|
|
+ fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
|
|
+
|
|
enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
|
|
|
|
return 0;
|
|
@@ -1563,6 +1667,14 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
|
return EIO;
|
|
}
|
|
|
|
+ if (check_dca_attach_enable(qp)) {
|
|
+ ret = dca_attach_qp_buf(ctx, qp);
|
|
+ if (ret)
|
|
+ verbs_err_datapath(&ctx->ibv_ctx,
|
|
+ "failed to attach QP-%u recv, ret = %d.\n",
|
|
+ qp->verbs_qp.qp.qp_num, ret);
|
|
+ }
|
|
+
|
|
return ret;
|
|
}
|
|
|
|
@@ -1758,6 +1870,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
|
static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
|
int attr_mask)
|
|
{
|
|
+ struct hns_roce_context *ctx = to_hr_ctx(qp->context);
|
|
struct hns_roce_modify_qp_ex_resp resp_ex = {};
|
|
struct hns_roce_modify_qp_ex cmd_ex = {};
|
|
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
|
|
@@ -1804,6 +1917,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
|
hns_roce_init_qp_indices(to_hr_qp(qp));
|
|
}
|
|
|
|
+ /* Try to shrink the DCA mem */
|
|
+ if (ctx->dca_ctx.mem_cnt > 0)
|
|
+ hns_roce_shrink_dca_mem(ctx);
|
|
+
|
|
record_qp_attr(qp, attr, attr_mask);
|
|
|
|
return ret;
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
|
index 1a7b828..50a920f 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.h
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
|
@@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe {
|
|
#define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259)
|
|
#define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
|
|
|
|
+#define RCWQE_SQPN_L_WIDTH 2
|
|
+
|
|
struct hns_roce_v2_wqe_data_seg {
|
|
__le32 len;
|
|
__le32 lkey;
|
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
|
index 69bcc13..248d862 100644
|
|
--- a/providers/hns/hns_roce_u_verbs.c
|
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
|
@@ -1311,6 +1311,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
|
|
return 0;
|
|
}
|
|
|
|
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
|
|
+{
|
|
+ if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
static void qp_free_wqe(struct hns_roce_qp *qp)
|
|
{
|
|
free_recv_rinl_buf(&qp->rq_rinl_buf);
|
|
@@ -1322,8 +1330,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
|
|
hns_roce_free_buf(&qp->buf);
|
|
}
|
|
|
|
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
|
|
- struct hns_roce_context *ctx)
|
|
+static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
|
|
+ struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
|
{
|
|
struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
|
|
|
|
@@ -1341,12 +1349,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
|
|
}
|
|
|
|
if (qp->rq_rinl_buf.wqe_cnt) {
|
|
- if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf))
|
|
+ if (alloc_recv_rinl_buf(attr->cap.max_recv_sge,
|
|
+ &qp->rq_rinl_buf))
|
|
goto err_alloc;
|
|
}
|
|
|
|
- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift))
|
|
- goto err_alloc;
|
|
+ if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
|
|
+ /* when DCA is enabled, use a buffer list to store page addr */
|
|
+ qp->buf.buf = NULL;
|
|
+ qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size);
|
|
+ qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT;
|
|
+ qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *));
|
|
+ if (!qp->dca_wqe.bufs)
|
|
+ goto err_alloc;
|
|
+ } else {
|
|
+ if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
|
|
+ HNS_HW_PAGE_SIZE))
|
|
+ goto err_alloc;
|
|
+ }
|
|
|
|
return 0;
|
|
|
|
@@ -1636,7 +1656,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
|
|
{
|
|
int ret;
|
|
|
|
- ret = qp_alloc_wqe(&attr->cap, qp, ctx);
|
|
+ ret = qp_alloc_wqe(attr, qp, ctx);
|
|
if (ret)
|
|
return ret;
|
|
|
|
--
|
|
2.33.0
|
|
|