When HW is in resetting stage, we could not poll back all the expected work completions as the HW won't generate cqe anymore. This patch allows driver to compose the expected wc instead of the HW during resetting stage. Once the hardware finished resetting, we can poll cq from hardware again. Signed-off-by: Ran Zhou <zhouran10@h-partners.com> (cherry picked from commit 5494e44cf97e65d858c8f7376c0424a833dc8323)
190 lines
6.4 KiB
Diff
190 lines
6.4 KiB
Diff
From 13d5c1bd7192d75f27aba97e556fb83bd182c561 Mon Sep 17 00:00:00 2001
|
|
From: Guofeng Yue <yueguofeng@hisilicon.com>
|
|
Date: Mon, 9 May 2022 16:03:38 +0800
|
|
Subject: [PATCH 13/18] libhns: Add reset stop flow mechanism
|
|
|
|
driver inclusion
|
|
category: bugfix
|
|
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I65WI7
|
|
|
|
------------------------------------------------------------------
|
|
|
|
Add an interface to the user space, which is used to receive
|
|
the kernel reset state. After receiving the reset flag, the
|
|
user space stops sending db.
|
|
|
|
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
|
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
|
|
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
|
|
---
|
|
providers/hns/hns_roce_u.c | 25 +++++++++++++++++++++++++
|
|
providers/hns/hns_roce_u.h | 5 +++++
|
|
providers/hns/hns_roce_u_db.h | 8 +++++++-
|
|
providers/hns/hns_roce_u_hw_v2.c | 19 ++++++++++++++-----
|
|
4 files changed, 51 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
|
index e1c2659..0e4f4c1 100644
|
|
--- a/providers/hns/hns_roce_u.c
|
|
+++ b/providers/hns/hns_roce_u.c
|
|
@@ -100,6 +100,24 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
|
|
return count_shift > size_shift ? count_shift - size_shift : 0;
|
|
}
|
|
|
|
+static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
|
|
+ struct hns_roce_alloc_ucontext_resp *resp,
|
|
+ int page_size)
|
|
+{
|
|
+ uint64_t reset_mmap_key = resp->reset_mmap_key;
|
|
+
|
|
+ /* The reset mmap key is 0, which means it is not supported. */
|
|
+ if (reset_mmap_key == 0)
|
|
+ return 0;
|
|
+
|
|
+ ctx->reset_state = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
|
|
+ cmd_fd, reset_mmap_key);
|
|
+ if (ctx->reset_state == MAP_FAILED)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int set_context_attr(struct hns_roce_device *hr_dev,
|
|
struct hns_roce_context *context,
|
|
struct hns_roce_alloc_ucontext_resp *resp)
|
|
@@ -176,6 +194,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
|
if (context->uar == MAP_FAILED)
|
|
goto err_free;
|
|
|
|
+ if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
|
|
+ goto reset_free;
|
|
+
|
|
pthread_mutex_init(&context->qp_table_mutex, NULL);
|
|
pthread_mutex_init(&context->srq_table_mutex, NULL);
|
|
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
|
|
@@ -185,6 +206,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
|
|
|
|
return &context->ibv_ctx;
|
|
|
|
+reset_free:
|
|
+ munmap(context->uar, hr_dev->page_size);
|
|
err_free:
|
|
verbs_uninit_context(&context->ibv_ctx);
|
|
free(context);
|
|
@@ -197,6 +220,8 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
|
|
struct hns_roce_context *context = to_hr_ctx(ibctx);
|
|
|
|
munmap(context->uar, hr_dev->page_size);
|
|
+ if (context->reset_state)
|
|
+ munmap(context->reset_state, hr_dev->page_size);
|
|
verbs_uninit_context(&context->ibv_ctx);
|
|
free(context);
|
|
}
|
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
|
index 56851b0..49de0f9 100644
|
|
--- a/providers/hns/hns_roce_u.h
|
|
+++ b/providers/hns/hns_roce_u.h
|
|
@@ -201,9 +201,14 @@ struct hns_roce_spinlock {
|
|
int need_lock;
|
|
};
|
|
|
|
+struct hns_roce_v2_reset_state {
|
|
+ uint32_t is_reset;
|
|
+};
|
|
+
|
|
struct hns_roce_context {
|
|
struct verbs_context ibv_ctx;
|
|
void *uar;
|
|
+ void *reset_state;
|
|
pthread_spinlock_t uar_lock;
|
|
|
|
struct {
|
|
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
|
|
index 8c47a53..de288de 100644
|
|
--- a/providers/hns/hns_roce_u_db.h
|
|
+++ b/providers/hns/hns_roce_u_db.h
|
|
@@ -40,8 +40,14 @@
|
|
|
|
#define HNS_ROCE_WORD_NUM 2
|
|
|
|
-static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
|
|
+static inline void hns_roce_write64(struct hns_roce_context *ctx, void *dest,
|
|
+ __le32 val[HNS_ROCE_WORD_NUM])
|
|
{
|
|
+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
|
|
+
|
|
+ if (state && state->is_reset)
|
|
+ return;
|
|
+
|
|
mmio_write64_le(dest, *(__le64 *)val);
|
|
}
|
|
|
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
|
index 1d7a304..1855d83 100644
|
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
|
@@ -284,7 +284,8 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
|
hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
|
|
hr_reg_write(&rq_db, DB_PI, rq_head);
|
|
|
|
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
|
|
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
|
+ (__le32 *)&rq_db);
|
|
}
|
|
|
|
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
|
@@ -298,7 +299,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
|
hr_reg_write(&sq_db, DB_PI, qp->sq.head);
|
|
hr_reg_write(&sq_db, DB_SL, qp->sl);
|
|
|
|
- hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
|
|
+ hns_roce_write64(ctx, qp->sq.db_reg, (__le32 *)&sq_db);
|
|
}
|
|
|
|
static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
|
@@ -309,6 +310,12 @@ static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
|
static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
|
{
|
|
struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
|
|
+ struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
|
|
+ struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
|
|
+ struct hns_roce_v2_reset_state *state = ctx->reset_state;
|
|
+
|
|
+ if (state && state->is_reset)
|
|
+ return;
|
|
|
|
/* All kinds of DirectWQE have the same header field layout */
|
|
hr_reg_enable(rc_sq_wqe, RCWQE_FLAG);
|
|
@@ -328,7 +335,8 @@ static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
|
|
hr_reg_write(&cq_db, DB_CQ_CI, cq->cons_index);
|
|
hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
|
|
|
|
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
|
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
|
+ (__le32 *)&cq_db);
|
|
}
|
|
|
|
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
|
@@ -762,7 +770,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
|
hr_reg_write(&cq_db, DB_CQ_CMD_SN, cq->arm_sn);
|
|
hr_reg_write(&cq_db, DB_CQ_NOTIFY, solicited_flag);
|
|
|
|
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
|
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
|
+ (__le32 *)&cq_db);
|
|
|
|
return 0;
|
|
}
|
|
@@ -1741,7 +1750,7 @@ static void update_srq_db(struct hns_roce_context *ctx, struct hns_roce_db *db,
|
|
hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
|
|
hr_reg_write(db, DB_PI, srq->idx_que.head);
|
|
|
|
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
|
+ hns_roce_write64(ctx, ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
|
(__le32 *)db);
|
|
}
|
|
|
|
--
|
|
2.33.0
|
|
|