DCA(Dynamic context attachment) support many RC QPs to share the WQE buffer in a memory pool, this help reducing the memory consumption when there are many QPs are inactive. Signed-off-by: Ran Zhou <zhouran10@h-partners.com> (cherry picked from commit 994c08d7e68ba906b7f7c16e8528700508af94b1)
224 lines
7.1 KiB
Diff
224 lines
7.1 KiB
Diff
From 5b151e86c6004c11913fc9a8086f0fc63902af45 Mon Sep 17 00:00:00 2001
|
|
From: Chengchang Tang <tangchengchang@huawei.com>
|
|
Date: Tue, 29 Jun 2021 21:01:27 +0800
|
|
Subject: [PATCH 24/25] libhns: Sync DCA status by shared memory
|
|
|
|
driver inclusion
|
|
category: feature
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
|
|
|
|
------------------------------------------------------------------
|
|
|
|
Use DCA num from the resp of modify_qp() and indicate the DCA status bit in
|
|
the shared memory, if the num is valid, the user DCA can get the DCA status
|
|
by testing the bit in the shared memory for each QP, othewise invoke the
|
|
verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status.
|
|
|
|
Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status
|
|
changing by kernel driver or user driver, another bit is used to indicate
|
|
the DCA attaching status.
|
|
|
|
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++
|
|
providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++
|
|
providers/hns/hns_roce_u_hw_v2.c | 21 +++++++++++++++-
|
|
3 files changed, 93 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
|
index e808ff3..5bddb00 100644
|
|
--- a/providers/hns/hns_roce_u.h
|
|
+++ b/providers/hns/hns_roce_u.h
|
|
@@ -379,6 +379,7 @@ struct hns_roce_dca_buf {
|
|
void **bufs;
|
|
unsigned int max_cnt;
|
|
unsigned int shift;
|
|
+ unsigned int dcan;
|
|
};
|
|
|
|
struct hns_roce_qp {
|
|
@@ -444,6 +445,7 @@ struct hns_roce_dca_attach_attr {
|
|
uint32_t sq_offset;
|
|
uint32_t sge_offset;
|
|
uint32_t rq_offset;
|
|
+ bool force;
|
|
};
|
|
|
|
struct hns_roce_dca_detach_attr {
|
|
@@ -556,6 +558,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
|
|
return 0;
|
|
}
|
|
|
|
+#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64))
|
|
+#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64)
|
|
+
|
|
+static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr)
|
|
+{
|
|
+ p += HNS_ROCE_BIT_WORD(nr);
|
|
+ return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr));
|
|
+}
|
|
+
|
|
+static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr)
|
|
+{
|
|
+ uint64_t mask = HNS_ROCE_BIT_MASK(nr);
|
|
+
|
|
+ p += HNS_ROCE_BIT_WORD(nr);
|
|
+ if (atomic_load(p) & mask)
|
|
+ return true;
|
|
+
|
|
+ return (atomic_fetch_or(p, mask) & mask) != 0;
|
|
+}
|
|
+
|
|
+static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr)
|
|
+{
|
|
+ p += HNS_ROCE_BIT_WORD(nr);
|
|
+ atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr));
|
|
+}
|
|
+
|
|
int hns_roce_u_query_device(struct ibv_context *context,
|
|
const struct ibv_query_device_ex_input *input,
|
|
struct ibv_device_attr_ex *attr, size_t attr_size);
|
|
@@ -636,6 +664,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
uint32_t size, struct hns_roce_dca_buf *buf);
|
|
void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
struct hns_roce_dca_detach_attr *attr);
|
|
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
|
|
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan);
|
|
+
|
|
void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
|
|
void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
|
|
|
|
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
|
|
index 3d41b89..08c0fbc 100644
|
|
--- a/providers/hns/hns_roce_u_buf.c
|
|
+++ b/providers/hns/hns_roce_u_buf.c
|
|
@@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle,
|
|
return (idx >= page_count) ? 0 : -ENOMEM;
|
|
}
|
|
|
|
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
|
|
+#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
|
|
+
|
|
+#define MAX_DCA_TRY_LOCK_TIMES 10
|
|
+bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
|
|
+{
|
|
+ atomic_bitmap_t *st = ctx->sync_status;
|
|
+ int try_times = 0;
|
|
+
|
|
+ if (!st || dcan >= ctx->max_qps)
|
|
+ return true;
|
|
+
|
|
+ while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan)))
|
|
+ if (try_times++ > MAX_DCA_TRY_LOCK_TIMES)
|
|
+ return false;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
|
|
+{
|
|
+ atomic_bitmap_t *st = ctx->sync_status;
|
|
+
|
|
+ if (!st || dcan >= ctx->max_qps)
|
|
+ return;
|
|
+
|
|
+ clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan));
|
|
+}
|
|
+
|
|
+static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan)
|
|
+{
|
|
+ atomic_bitmap_t *st = ctx->buf_status;
|
|
+
|
|
+ if (!st || dcan >= ctx->max_qps)
|
|
+ return false;
|
|
+
|
|
+ return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan));
|
|
+}
|
|
+
|
|
#define DCA_EXPAND_MEM_TRY_TIMES 3
|
|
int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
struct hns_roce_dca_attach_attr *attr,
|
|
@@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
|
|
int try_times = 0;
|
|
int ret = 0;
|
|
|
|
+ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan))
|
|
+ return 0;
|
|
+
|
|
do {
|
|
resp.alloc_pages = 0;
|
|
ret = attach_dca_mem(ctx, handle, attr, &resp);
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index 7a93456..15d9108 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -612,6 +612,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
|
|
struct hns_roce_qp *qp)
|
|
{
|
|
struct hns_roce_dca_attach_attr attr = {};
|
|
+ bool enable_detach;
|
|
uint32_t idx;
|
|
int ret;
|
|
|
|
@@ -633,9 +634,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx,
|
|
attr.rq_offset = idx << qp->rq.wqe_shift;
|
|
}
|
|
|
|
+ enable_detach = check_dca_detach_enable(qp);
|
|
+ if (enable_detach &&
|
|
+ !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan))
|
|
+ /* Force attach if failed to sync dca status */
|
|
+ attr.force = true;
|
|
|
|
ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
|
|
- qp->buf_size, &qp->dca_wqe);
|
|
+ qp->buf_size, &qp->dca_wqe);
|
|
+ if (ret && enable_detach)
|
|
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
|
|
|
|
hns_roce_spin_unlock(&qp->rq.hr_lock);
|
|
hns_roce_spin_unlock(&qp->sq.hr_lock);
|
|
@@ -1643,6 +1651,9 @@ out:
|
|
|
|
hns_roce_spin_unlock(&qp->sq.hr_lock);
|
|
|
|
+ if (check_dca_detach_enable(qp))
|
|
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
|
|
+
|
|
if (ibvqp->state == IBV_QPS_ERR) {
|
|
attr.qp_state = IBV_QPS_ERR;
|
|
|
|
@@ -1784,6 +1795,9 @@ out:
|
|
|
|
hns_roce_spin_unlock(&qp->rq.hr_lock);
|
|
|
|
+ if (check_dca_detach_enable(qp))
|
|
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
|
|
+
|
|
if (ibvqp->state == IBV_QPS_ERR) {
|
|
attr.qp_state = IBV_QPS_ERR;
|
|
hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE);
|
|
@@ -1902,6 +1916,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
|
if (attr->qp_state == IBV_QPS_RTR) {
|
|
hr_qp->tc_mode = resp_ex.drv_payload.tc_mode;
|
|
hr_qp->priority = resp_ex.drv_payload.priority;
|
|
+ hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan;
|
|
}
|
|
}
|
|
|
|
@@ -2951,6 +2966,10 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
|
|
|
|
out:
|
|
hns_roce_spin_unlock(&qp->sq.hr_lock);
|
|
+
|
|
+ if (check_dca_detach_enable(qp))
|
|
+ hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
|
|
+
|
|
if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
|
|
attr.qp_state = IBV_QPS_ERR;
|
|
hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
|
|
--
|
|
2.33.0
|
|
|