rdma-core/0010-libhns-Add-support-for-lock-free-SRQ.patch
Ran Zhou e56042b4e2 Support reporting wc as software mode.
When HW is in resetting stage, we could not poll back all the
expected work completions as the HW won't generate cqe anymore.
This patch allows driver to compose the expected wc instead of the HW
during resetting stage. Once the hardware finished resetting, we can
poll cq from hardware again.

Signed-off-by: Ran Zhou <zhouran10@h-partners.com>
(cherry picked from commit 5494e44cf97e65d858c8f7376c0424a833dc8323)
2024-03-28 20:21:14 +08:00

158 lines
4.7 KiB
Diff

From 19f2857b3bb6b5b6992ae7314b52c7b84e08780d Mon Sep 17 00:00:00 2001
From: zzry <1245464216@qq.com>
Date: Fri, 8 Mar 2024 16:33:48 +0800
Subject: [PATCH 10/10] libhns: Add support for lock-free SRQ
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST
------------------------------------------------------------------
Drop SRQ locks when associated to a PAD holding a TD.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
providers/hns/hns_roce_u.h | 2 +-
providers/hns/hns_roce_u_hw_v2.c | 8 ++++----
providers/hns/hns_roce_u_verbs.c | 31 +++++++++++++++++++++++++++++--
3 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 0035e36..21a6e28 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -292,7 +292,7 @@ struct hns_roce_srq {
struct hns_roce_idx_que idx_que;
struct hns_roce_buf wqe_buf;
struct hns_roce_rinl_buf srq_rinl_buf;
- pthread_spinlock_t lock;
+ struct hns_roce_spinlock hr_lock;
unsigned long *wrid;
unsigned int srqn;
unsigned int wqe_cnt;
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 2fb4d72..1d7a304 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -229,14 +229,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
uint32_t bitmap_num;
int bit_num;
- pthread_spin_lock(&srq->lock);
+ hns_roce_spin_lock(&srq->hr_lock);
bitmap_num = ind / BIT_CNT_PER_LONG;
bit_num = ind % BIT_CNT_PER_LONG;
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
srq->idx_que.tail++;
- pthread_spin_unlock(&srq->lock);
+ hns_roce_spin_unlock(&srq->hr_lock);
}
static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
@@ -1756,7 +1756,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
int ret = 0;
void *wqe;
- pthread_spin_lock(&srq->lock);
+ hns_roce_spin_lock(&srq->hr_lock);
max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
@@ -1795,7 +1795,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
update_srq_db(ctx, &srq_db, srq);
}
- pthread_spin_unlock(&srq->lock);
+ hns_roce_spin_unlock(&srq->hr_lock);
return ret;
}
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index afde313..00e59dc 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -461,6 +461,19 @@ static int hns_roce_cq_spinlock_init(struct ibv_context *context,
return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
}
+static int hns_roce_srq_spinlock_init(struct ibv_context *context,
+ struct hns_roce_srq *srq,
+ struct ibv_srq_init_attr_ex *attr)
+{
+ bool need_lock;
+
+ need_lock = hns_roce_whether_need_lock(attr->pd);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
+
+ return hns_roce_spinlock_init(&srq->hr_lock, need_lock);
+}
+
static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
{
int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
@@ -830,6 +843,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
struct ibv_srq_init_attr_ex *init_attr)
{
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
+ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd);
struct hns_roce_srq *srq;
int ret;
@@ -843,12 +857,15 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
goto err;
}
- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+ if (pad)
+ atomic_fetch_add(&pad->pd.refcount, 1);
+
+ if (hns_roce_srq_spinlock_init(context, srq, init_attr))
goto err_free_srq;
set_srq_param(context, srq, init_attr);
if (alloc_srq_buf(srq))
- goto err_free_srq;
+ goto err_destroy_lock;
srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
if (!srq->rdb)
@@ -879,6 +896,9 @@ err_srq_db:
err_srq_buf:
free_srq_buf(srq);
+err_destroy_lock:
+ hns_roce_spinlock_destroy(&srq->hr_lock);
+
err_free_srq:
free(srq);
@@ -943,6 +963,7 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
{
struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context);
+ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd);
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
int ret;
@@ -954,6 +975,12 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
free_srq_buf(srq);
+
+ hns_roce_spinlock_destroy(&srq->hr_lock);
+
+ if (pad)
+ atomic_fetch_sub(&pad->pd.refcount, 1);
+
free(srq);
return 0;
--
2.33.0