Support thread domain and parent domain for lock-free
Add support for thread domain (TD) and parent domain (PAD). When a parent domain holds a thread domain, the associated data path will be set to lock-free mode to improve performance. Signed-off-by: Ran Zhou <zhouran10@h-partners.com> (cherry picked from commit 60b829d79704e6b4611d7040265a7cf852057931)
This commit is contained in:
parent
2e016d7323
commit
4e8e4d8653
363
0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch
Normal file
363
0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
From bb6a6264246a2a51680a2d4b104a296a9cdf4fab Mon Sep 17 00:00:00 2001
|
||||||
|
From: zzry <1245464216@qq.com>
|
||||||
|
Date: Fri, 8 Mar 2024 15:05:55 +0800
|
||||||
|
Subject: [PATCH 07/10] libhns: Add support for thread domain and parent domain
|
||||||
|
|
||||||
|
Add support for thread domain (TD) and parent domain (PAD).
|
||||||
|
Extend the orginal hns_roce_pd struct to hns_roce_pad by
|
||||||
|
adding the new hns_roce_td struct. When a parent domain
|
||||||
|
holds a thread domain, the associated data path will be set
|
||||||
|
to lock-free mode to improve performance.
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u.c | 5 +-
|
||||||
|
providers/hns/hns_roce_u.h | 69 +++++++++++++-
|
||||||
|
providers/hns/hns_roce_u_verbs.c | 156 ++++++++++++++++++++++++++++---
|
||||||
|
3 files changed, 215 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
|
||||||
|
index 90f250e..e1c2659 100644
|
||||||
|
--- a/providers/hns/hns_roce_u.c
|
||||||
|
+++ b/providers/hns/hns_roce_u.c
|
||||||
|
@@ -67,7 +67,7 @@ static const struct verbs_context_ops hns_common_ops = {
|
||||||
|
.create_qp = hns_roce_u_create_qp,
|
||||||
|
.create_qp_ex = hns_roce_u_create_qp_ex,
|
||||||
|
.dealloc_mw = hns_roce_u_dealloc_mw,
|
||||||
|
- .dealloc_pd = hns_roce_u_free_pd,
|
||||||
|
+ .dealloc_pd = hns_roce_u_dealloc_pd,
|
||||||
|
.dereg_mr = hns_roce_u_dereg_mr,
|
||||||
|
.destroy_cq = hns_roce_u_destroy_cq,
|
||||||
|
.modify_cq = hns_roce_u_modify_cq,
|
||||||
|
@@ -88,6 +88,9 @@ static const struct verbs_context_ops hns_common_ops = {
|
||||||
|
.close_xrcd = hns_roce_u_close_xrcd,
|
||||||
|
.open_qp = hns_roce_u_open_qp,
|
||||||
|
.get_srq_num = hns_roce_u_get_srq_num,
|
||||||
|
+ .alloc_td = hns_roce_u_alloc_td,
|
||||||
|
+ .dealloc_td = hns_roce_u_dealloc_td,
|
||||||
|
+ .alloc_parent_domain = hns_roce_u_alloc_pad,
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
|
||||||
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||||
|
index c73e5c0..5d3f480 100644
|
||||||
|
--- a/providers/hns/hns_roce_u.h
|
||||||
|
+++ b/providers/hns/hns_roce_u.h
|
||||||
|
@@ -196,6 +196,11 @@ struct hns_roce_db_page {
|
||||||
|
unsigned long *bitmap;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct hns_roce_spinlock {
|
||||||
|
+ pthread_spinlock_t lock;
|
||||||
|
+ int need_lock;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct hns_roce_context {
|
||||||
|
struct verbs_context ibv_ctx;
|
||||||
|
void *uar;
|
||||||
|
@@ -230,9 +235,21 @@ struct hns_roce_context {
|
||||||
|
unsigned int max_inline_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct hns_roce_td {
|
||||||
|
+ struct ibv_td ibv_td;
|
||||||
|
+ atomic_int refcount;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct hns_roce_pd {
|
||||||
|
struct ibv_pd ibv_pd;
|
||||||
|
unsigned int pdn;
|
||||||
|
+ atomic_int refcount;
|
||||||
|
+ struct hns_roce_pd *protection_domain;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct hns_roce_pad {
|
||||||
|
+ struct hns_roce_pd pd;
|
||||||
|
+ struct hns_roce_td *td;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hns_roce_cq {
|
||||||
|
@@ -406,9 +423,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx)
|
||||||
|
return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td)
|
||||||
|
+{
|
||||||
|
+ return container_of(ibv_td, struct hns_roce_td, ibv_td);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* to_hr_pd always returns the real hns_roce_pd obj. */
|
||||||
|
static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd)
|
||||||
|
{
|
||||||
|
- return container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
|
||||||
|
+ struct hns_roce_pd *pd =
|
||||||
|
+ container_of(ibv_pd, struct hns_roce_pd, ibv_pd);
|
||||||
|
+
|
||||||
|
+ if (pd->protection_domain)
|
||||||
|
+ return pd->protection_domain;
|
||||||
|
+
|
||||||
|
+ return pd;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_pad *pad =
|
||||||
|
+ ibv_pd ?
|
||||||
|
+ container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) :
|
||||||
|
+ NULL;
|
||||||
|
+
|
||||||
|
+ if (pad && pad->pd.protection_domain)
|
||||||
|
+ return pad;
|
||||||
|
+
|
||||||
|
+ /* Otherwise ibv_pd isn't a parent_domain */
|
||||||
|
+ return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
|
||||||
|
@@ -431,14 +474,35 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah)
|
||||||
|
return container_of(ibv_ah, struct hns_roce_ah, ibv_ah);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock)
|
||||||
|
+{
|
||||||
|
+ if (hr_lock->need_lock)
|
||||||
|
+ return pthread_spin_lock(&hr_lock->lock);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock)
|
||||||
|
+{
|
||||||
|
+ if (hr_lock->need_lock)
|
||||||
|
+ return pthread_spin_unlock(&hr_lock->lock);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int hns_roce_u_query_device(struct ibv_context *context,
|
||||||
|
const struct ibv_query_device_ex_input *input,
|
||||||
|
struct ibv_device_attr_ex *attr, size_t attr_size);
|
||||||
|
int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
|
||||||
|
struct ibv_port_attr *attr);
|
||||||
|
|
||||||
|
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
|
||||||
|
+ struct ibv_td_init_attr *attr);
|
||||||
|
+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td);
|
||||||
|
+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
|
||||||
|
+ struct ibv_parent_domain_init_attr *attr);
|
||||||
|
struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
|
||||||
|
-int hns_roce_u_free_pd(struct ibv_pd *pd);
|
||||||
|
+int hns_roce_u_dealloc_pd(struct ibv_pd *pd);
|
||||||
|
|
||||||
|
struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
|
||||||
|
uint64_t hca_va, int access);
|
||||||
|
@@ -497,6 +561,7 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd);
|
||||||
|
int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
|
||||||
|
int page_size);
|
||||||
|
void hns_roce_free_buf(struct hns_roce_buf *buf);
|
||||||
|
+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
|
||||||
|
|
||||||
|
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
index dcdc722..ecf8666 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
@@ -33,6 +33,7 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
+#include <math.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
@@ -42,6 +43,37 @@
|
||||||
|
#include "hns_roce_u_db.h"
|
||||||
|
#include "hns_roce_u_hw_v2.h"
|
||||||
|
|
||||||
|
+static bool hns_roce_whether_need_lock(struct ibv_pd *pd)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_pad *pad;
|
||||||
|
+
|
||||||
|
+ pad = to_hr_pad(pd);
|
||||||
|
+ if (pad && pad->td)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock,
|
||||||
|
+ bool need_lock)
|
||||||
|
+{
|
||||||
|
+ hr_lock->need_lock = need_lock;
|
||||||
|
+
|
||||||
|
+ if (need_lock)
|
||||||
|
+ return pthread_spin_init(&hr_lock->lock,
|
||||||
|
+ PTHREAD_PROCESS_PRIVATE);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock)
|
||||||
|
+{
|
||||||
|
+ if (hr_lock->need_lock)
|
||||||
|
+ return pthread_spin_destroy(&hr_lock->lock);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void hns_roce_init_qp_indices(struct hns_roce_qp *qp)
|
||||||
|
{
|
||||||
|
qp->sq.head = 0;
|
||||||
|
@@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port,
|
||||||
|
return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
|
||||||
|
}
|
||||||
|
|
||||||
|
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context,
|
||||||
|
+ struct ibv_td_init_attr *attr)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_td *td;
|
||||||
|
+
|
||||||
|
+ if (attr->comp_mask) {
|
||||||
|
+ errno = EOPNOTSUPP;
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ td = calloc(1, sizeof(*td));
|
||||||
|
+ if (!td) {
|
||||||
|
+ errno = ENOMEM;
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ td->ibv_td.context = context;
|
||||||
|
+ atomic_init(&td->refcount, 1);
|
||||||
|
+
|
||||||
|
+ return &td->ibv_td;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int hns_roce_u_dealloc_td(struct ibv_td *ibv_td)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_td *td;
|
||||||
|
+
|
||||||
|
+ td = to_hr_td(ibv_td);
|
||||||
|
+ if (atomic_load(&td->refcount) > 1)
|
||||||
|
+ return EBUSY;
|
||||||
|
+
|
||||||
|
+ free(td);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context)
|
||||||
|
{
|
||||||
|
+ struct hns_roce_alloc_pd_resp resp = {};
|
||||||
|
struct ibv_alloc_pd cmd;
|
||||||
|
struct hns_roce_pd *pd;
|
||||||
|
- struct hns_roce_alloc_pd_resp resp = {};
|
||||||
|
-
|
||||||
|
- pd = malloc(sizeof(*pd));
|
||||||
|
- if (!pd)
|
||||||
|
- return NULL;
|
||||||
|
|
||||||
|
- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
|
||||||
|
- &resp.ibv_resp, sizeof(resp))) {
|
||||||
|
- free(pd);
|
||||||
|
+ pd = calloc(1, sizeof(*pd));
|
||||||
|
+ if (!pd) {
|
||||||
|
+ errno = ENOMEM;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
+ errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
|
||||||
|
+ &resp.ibv_resp, sizeof(resp));
|
||||||
|
|
||||||
|
+ if (errno)
|
||||||
|
+ goto err;
|
||||||
|
+
|
||||||
|
+ atomic_init(&pd->refcount, 1);
|
||||||
|
pd->pdn = resp.pdn;
|
||||||
|
|
||||||
|
return &pd->ibv_pd;
|
||||||
|
+
|
||||||
|
+err:
|
||||||
|
+ free(pd);
|
||||||
|
+ return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int hns_roce_u_free_pd(struct ibv_pd *pd)
|
||||||
|
+struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context,
|
||||||
|
+ struct ibv_parent_domain_init_attr *attr)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_pad *pad;
|
||||||
|
+
|
||||||
|
+ if (ibv_check_alloc_parent_domain(attr))
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ if (attr->comp_mask) {
|
||||||
|
+ errno = EOPNOTSUPP;
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ pad = calloc(1, sizeof(*pad));
|
||||||
|
+ if (!pad) {
|
||||||
|
+ errno = ENOMEM;
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (attr->td) {
|
||||||
|
+ pad->td = to_hr_td(attr->td);
|
||||||
|
+ atomic_fetch_add(&pad->td->refcount, 1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ pad->pd.protection_domain = to_hr_pd(attr->pd);
|
||||||
|
+ atomic_fetch_add(&pad->pd.protection_domain->refcount, 1);
|
||||||
|
+
|
||||||
|
+ atomic_init(&pad->pd.refcount, 1);
|
||||||
|
+ ibv_initialize_parent_domain(&pad->pd.ibv_pd,
|
||||||
|
+ &pad->pd.protection_domain->ibv_pd);
|
||||||
|
+
|
||||||
|
+ return &pad->pd.ibv_pd;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void hns_roce_free_pad(struct hns_roce_pad *pad)
|
||||||
|
+{
|
||||||
|
+ atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1);
|
||||||
|
+
|
||||||
|
+ if (pad->td)
|
||||||
|
+ atomic_fetch_sub(&pad->td->refcount, 1);
|
||||||
|
+
|
||||||
|
+ free(pad);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int hns_roce_free_pd(struct hns_roce_pd *pd)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- ret = ibv_cmd_dealloc_pd(pd);
|
||||||
|
+ if (atomic_load(&pd->refcount) > 1)
|
||||||
|
+ return EBUSY;
|
||||||
|
+
|
||||||
|
+ ret = ibv_cmd_dealloc_pd(&pd->ibv_pd);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- free(to_hr_pd(pd));
|
||||||
|
+ free(pd);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
|
||||||
|
- return ret;
|
||||||
|
+int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd)
|
||||||
|
+{
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(ibv_pd);
|
||||||
|
+ struct hns_roce_pd *pd = to_hr_pd(ibv_pd);
|
||||||
|
+
|
||||||
|
+ if (pad) {
|
||||||
|
+ hns_roce_free_pad(pad);
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return hns_roce_free_pd(pd);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,
|
||||||
|
--
|
||||||
|
2.30.0
|
||||||
|
|
||||||
223
0008-libhns-Add-support-for-lock-free-QP.patch
Normal file
223
0008-libhns-Add-support-for-lock-free-QP.patch
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
From fc7cb76b5b56d67182e6fa1cb7a3c19aa09ef90a Mon Sep 17 00:00:00 2001
|
||||||
|
From: zzry <1245464216@qq.com>
|
||||||
|
Date: Fri, 8 Mar 2024 15:56:09 +0800
|
||||||
|
Subject: [PATCH 08/10] libhns: Add support for lock-free QP
|
||||||
|
|
||||||
|
Drop QP locks when associated to a PAD holding a TD.
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u.h | 2 +-
|
||||||
|
providers/hns/hns_roce_u_hw_v2.c | 26 ++++++++++-------
|
||||||
|
providers/hns/hns_roce_u_verbs.c | 49 +++++++++++++++++++++++++++++---
|
||||||
|
3 files changed, 61 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||||
|
index 5d3f480..5732e39 100644
|
||||||
|
--- a/providers/hns/hns_roce_u.h
|
||||||
|
+++ b/providers/hns/hns_roce_u.h
|
||||||
|
@@ -305,7 +305,7 @@ struct hns_roce_srq {
|
||||||
|
|
||||||
|
struct hns_roce_wq {
|
||||||
|
unsigned long *wrid;
|
||||||
|
- pthread_spinlock_t lock;
|
||||||
|
+ struct hns_roce_spinlock hr_lock;
|
||||||
|
unsigned int wqe_cnt;
|
||||||
|
int max_post;
|
||||||
|
unsigned int head;
|
||||||
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
index dd13049..90a76e2 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
@@ -1270,7 +1270,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_lock(&qp->sq.lock);
|
||||||
|
+ hns_roce_spin_lock(&qp->sq.hr_lock);
|
||||||
|
|
||||||
|
sge_info.start_idx = qp->next_sge; /* start index of extend sge */
|
||||||
|
|
||||||
|
@@ -1331,7 +1331,7 @@ out:
|
||||||
|
*(qp->sdb) = qp->sq.head & 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&qp->sq.lock);
|
||||||
|
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
|
||||||
|
|
||||||
|
if (ibvqp->state == IBV_QPS_ERR) {
|
||||||
|
attr.qp_state = IBV_QPS_ERR;
|
||||||
|
@@ -1420,7 +1420,7 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_lock(&qp->rq.lock);
|
||||||
|
+ hns_roce_spin_lock(&qp->rq.hr_lock);
|
||||||
|
|
||||||
|
max_sge = qp->rq.max_gs - qp->rq.rsv_sge;
|
||||||
|
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||||
|
@@ -1454,7 +1454,7 @@ out:
|
||||||
|
hns_roce_update_rq_db(ctx, ibvqp->qp_num, qp->rq.head);
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&qp->rq.lock);
|
||||||
|
+ hns_roce_spin_unlock(&qp->rq.hr_lock);
|
||||||
|
|
||||||
|
if (ibvqp->state == IBV_QPS_ERR) {
|
||||||
|
attr.qp_state = IBV_QPS_ERR;
|
||||||
|
@@ -1549,8 +1549,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if ((attr_mask & IBV_QP_STATE) && (attr->qp_state == IBV_QPS_ERR)) {
|
||||||
|
- pthread_spin_lock(&hr_qp->sq.lock);
|
||||||
|
- pthread_spin_lock(&hr_qp->rq.lock);
|
||||||
|
+ hns_roce_spin_lock(&hr_qp->sq.hr_lock);
|
||||||
|
+ hns_roce_spin_lock(&hr_qp->rq.hr_lock);
|
||||||
|
flag = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1561,8 +1561,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
||||||
|
if (flag) {
|
||||||
|
if (!ret)
|
||||||
|
qp->state = IBV_QPS_ERR;
|
||||||
|
- pthread_spin_unlock(&hr_qp->rq.lock);
|
||||||
|
- pthread_spin_unlock(&hr_qp->sq.lock);
|
||||||
|
+ hns_roce_spin_unlock(&hr_qp->sq.hr_lock);
|
||||||
|
+ hns_roce_spin_unlock(&hr_qp->rq.hr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
@@ -1640,6 +1640,7 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp)
|
||||||
|
static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||||
|
{
|
||||||
|
struct hns_roce_context *ctx = to_hr_ctx(ibqp->context);
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(ibqp->pd);
|
||||||
|
struct hns_roce_qp *qp = to_hr_qp(ibqp);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
@@ -1666,6 +1667,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||||
|
|
||||||
|
hns_roce_free_qp_buf(qp, ctx);
|
||||||
|
|
||||||
|
+ if (pad)
|
||||||
|
+ atomic_fetch_sub(&pad->pd.refcount, 1);
|
||||||
|
+
|
||||||
|
free(qp);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
@@ -2555,7 +2559,7 @@ static void wr_start(struct ibv_qp_ex *ibv_qp)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_lock(&qp->sq.lock);
|
||||||
|
+ hns_roce_spin_lock(&qp->sq.hr_lock);
|
||||||
|
qp->sge_info.start_idx = qp->next_sge;
|
||||||
|
qp->rb_sq_head = qp->sq.head;
|
||||||
|
qp->err = 0;
|
||||||
|
@@ -2588,7 +2592,7 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
- pthread_spin_unlock(&qp->sq.lock);
|
||||||
|
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
|
||||||
|
if (ibv_qp->qp_base.state == IBV_QPS_ERR) {
|
||||||
|
attr.qp_state = IBV_QPS_ERR;
|
||||||
|
hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
|
||||||
|
@@ -2603,7 +2607,7 @@ static void wr_abort(struct ibv_qp_ex *ibv_qp)
|
||||||
|
|
||||||
|
qp->sq.head = qp->rb_sq_head;
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&qp->sq.lock);
|
||||||
|
+ hns_roce_spin_unlock(&qp->sq.hr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum {
|
||||||
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
index ecf8666..d503031 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
@@ -1033,6 +1033,41 @@ static int verify_qp_create_attr(struct hns_roce_context *ctx,
|
||||||
|
return verify_qp_create_cap(ctx, attr);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int hns_roce_qp_spinlock_init(struct hns_roce_context *ctx,
|
||||||
|
+ struct ibv_qp_init_attr_ex *attr,
|
||||||
|
+ struct hns_roce_qp *qp)
|
||||||
|
+{
|
||||||
|
+ bool sq_need_lock;
|
||||||
|
+ bool rq_need_lock;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ sq_need_lock = hns_roce_whether_need_lock(attr->pd);
|
||||||
|
+ if (!sq_need_lock)
|
||||||
|
+ verbs_info(&ctx->ibv_ctx, "configure sq as no lock.\n");
|
||||||
|
+
|
||||||
|
+ rq_need_lock = hns_roce_whether_need_lock(attr->pd);
|
||||||
|
+ if (!rq_need_lock)
|
||||||
|
+ verbs_info(&ctx->ibv_ctx, "configure rq as no lock.\n");
|
||||||
|
+
|
||||||
|
+ ret = hns_roce_spinlock_init(&qp->sq.hr_lock, sq_need_lock);
|
||||||
|
+ if (ret)
|
||||||
|
+ return ret;
|
||||||
|
+
|
||||||
|
+ ret = hns_roce_spinlock_init(&qp->rq.hr_lock, rq_need_lock);
|
||||||
|
+ if (ret) {
|
||||||
|
+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp)
|
||||||
|
+{
|
||||||
|
+ hns_roce_spinlock_destroy(&qp->rq.hr_lock);
|
||||||
|
+ hns_roce_spinlock_destroy(&qp->sq.hr_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int alloc_recv_rinl_buf(uint32_t max_sge,
|
||||||
|
struct hns_roce_rinl_buf *rinl_buf)
|
||||||
|
{
|
||||||
|
@@ -1435,10 +1470,6 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
|
||||||
|
- pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
|
||||||
|
- return -ENOMEM;
|
||||||
|
-
|
||||||
|
ret = qp_alloc_wqe(&attr->cap, qp, ctx);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
@@ -1466,6 +1497,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||||
|
struct hnsdv_qp_init_attr *hns_attr)
|
||||||
|
{
|
||||||
|
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(attr->pd);
|
||||||
|
struct hns_roce_qp *qp;
|
||||||
|
uint64_t dwqe_mmap_key;
|
||||||
|
int ret;
|
||||||
|
@@ -1482,6 +1514,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||||
|
|
||||||
|
hns_roce_set_qp_params(attr, qp, context);
|
||||||
|
|
||||||
|
+ if (pad)
|
||||||
|
+ atomic_fetch_add(&pad->pd.refcount, 1);
|
||||||
|
+
|
||||||
|
+ ret = hns_roce_qp_spinlock_init(context, attr, qp);
|
||||||
|
+ if (ret)
|
||||||
|
+ goto err_spinlock;
|
||||||
|
+
|
||||||
|
ret = hns_roce_alloc_qp_buf(attr, qp, context);
|
||||||
|
if (ret)
|
||||||
|
goto err_buf;
|
||||||
|
@@ -1515,6 +1554,8 @@ err_ops:
|
||||||
|
err_cmd:
|
||||||
|
hns_roce_free_qp_buf(qp, context);
|
||||||
|
err_buf:
|
||||||
|
+ hns_roce_qp_spinlock_destroy(qp);
|
||||||
|
+err_spinlock:
|
||||||
|
free(qp);
|
||||||
|
err:
|
||||||
|
if (ret < 0)
|
||||||
|
--
|
||||||
|
2.30.0
|
||||||
|
|
||||||
273
0009-libhns-Add-support-for-lock-free-CQ.patch
Normal file
273
0009-libhns-Add-support-for-lock-free-CQ.patch
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
From 41d0630d763bd39631331c76a9ecdbb245ce9502 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zzry <1245464216@qq.com>
|
||||||
|
Date: Fri, 8 Mar 2024 16:29:34 +0800
|
||||||
|
Subject: [PATCH 09/10] libhns: Add support for lock-free CQ
|
||||||
|
|
||||||
|
Drop CQ locks when associated to a PAD holding a TD.
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u.h | 3 +-
|
||||||
|
providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++-------------
|
||||||
|
providers/hns/hns_roce_u_verbs.c | 56 ++++++++++++++++++++++++++++----
|
||||||
|
3 files changed, 74 insertions(+), 31 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||||
|
index 5732e39..0035e36 100644
|
||||||
|
--- a/providers/hns/hns_roce_u.h
|
||||||
|
+++ b/providers/hns/hns_roce_u.h
|
||||||
|
@@ -255,7 +255,7 @@ struct hns_roce_pad {
|
||||||
|
struct hns_roce_cq {
|
||||||
|
struct verbs_cq verbs_cq;
|
||||||
|
struct hns_roce_buf buf;
|
||||||
|
- pthread_spinlock_t lock;
|
||||||
|
+ struct hns_roce_spinlock hr_lock;
|
||||||
|
unsigned int cqn;
|
||||||
|
unsigned int cq_depth;
|
||||||
|
unsigned int cons_index;
|
||||||
|
@@ -265,6 +265,7 @@ struct hns_roce_cq {
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned int cqe_size;
|
||||||
|
struct hns_roce_v2_cqe *cqe;
|
||||||
|
+ struct ibv_pd *parent_domain;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hns_roce_idx_que {
|
||||||
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
index 90a76e2..2fb4d72 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
@@ -268,9 +268,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
|
||||||
|
if (cur + nreq < wq->max_post)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
- pthread_spin_lock(&cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&cq->hr_lock);
|
||||||
|
cur = wq->head - wq->tail;
|
||||||
|
- pthread_spin_unlock(&cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&cq->hr_lock);
|
||||||
|
|
||||||
|
return cur + nreq >= wq->max_post;
|
||||||
|
}
|
||||||
|
@@ -724,7 +724,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||||
|
int err = V2_CQ_OK;
|
||||||
|
int npolled;
|
||||||
|
|
||||||
|
- pthread_spin_lock(&cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&cq->hr_lock);
|
||||||
|
|
||||||
|
for (npolled = 0; npolled < ne; ++npolled) {
|
||||||
|
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
|
||||||
|
@@ -739,7 +739,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||||
|
update_cq_db(ctx, cq);
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&cq->hr_lock);
|
||||||
|
|
||||||
|
return err == V2_CQ_POLL_ERR ? err : npolled;
|
||||||
|
}
|
||||||
|
@@ -1510,9 +1510,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
|
||||||
|
static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn,
|
||||||
|
struct hns_roce_srq *srq)
|
||||||
|
{
|
||||||
|
- pthread_spin_lock(&cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&cq->hr_lock);
|
||||||
|
__hns_roce_v2_cq_clean(cq, qpn, srq);
|
||||||
|
- pthread_spin_unlock(&cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&cq->hr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
|
||||||
|
@@ -1600,18 +1600,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp)
|
||||||
|
|
||||||
|
if (send_cq && recv_cq) {
|
||||||
|
if (send_cq == recv_cq) {
|
||||||
|
- pthread_spin_lock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&send_cq->hr_lock);
|
||||||
|
} else if (send_cq->cqn < recv_cq->cqn) {
|
||||||
|
- pthread_spin_lock(&send_cq->lock);
|
||||||
|
- pthread_spin_lock(&recv_cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&send_cq->hr_lock);
|
||||||
|
+ hns_roce_spin_lock(&recv_cq->hr_lock);
|
||||||
|
} else {
|
||||||
|
- pthread_spin_lock(&recv_cq->lock);
|
||||||
|
- pthread_spin_lock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&recv_cq->hr_lock);
|
||||||
|
+ hns_roce_spin_lock(&send_cq->hr_lock);
|
||||||
|
}
|
||||||
|
} else if (send_cq) {
|
||||||
|
- pthread_spin_lock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&send_cq->hr_lock);
|
||||||
|
} else if (recv_cq) {
|
||||||
|
- pthread_spin_lock(&recv_cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&recv_cq->hr_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1622,18 +1622,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp)
|
||||||
|
|
||||||
|
if (send_cq && recv_cq) {
|
||||||
|
if (send_cq == recv_cq) {
|
||||||
|
- pthread_spin_unlock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&send_cq->hr_lock);
|
||||||
|
} else if (send_cq->cqn < recv_cq->cqn) {
|
||||||
|
- pthread_spin_unlock(&recv_cq->lock);
|
||||||
|
- pthread_spin_unlock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
|
||||||
|
+ hns_roce_spin_unlock(&send_cq->hr_lock);
|
||||||
|
} else {
|
||||||
|
- pthread_spin_unlock(&send_cq->lock);
|
||||||
|
- pthread_spin_unlock(&recv_cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&send_cq->hr_lock);
|
||||||
|
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
|
||||||
|
}
|
||||||
|
} else if (send_cq) {
|
||||||
|
- pthread_spin_unlock(&send_cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&send_cq->hr_lock);
|
||||||
|
} else if (recv_cq) {
|
||||||
|
- pthread_spin_unlock(&recv_cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1811,11 +1811,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
|
||||||
|
if (attr->comp_mask)
|
||||||
|
return EINVAL;
|
||||||
|
|
||||||
|
- pthread_spin_lock(&cq->lock);
|
||||||
|
+ hns_roce_spin_lock(&cq->hr_lock);
|
||||||
|
|
||||||
|
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
|
||||||
|
if (err != V2_CQ_OK)
|
||||||
|
- pthread_spin_unlock(&cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&cq->hr_lock);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
@@ -1849,7 +1849,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
|
||||||
|
else
|
||||||
|
update_cq_db(ctx, cq);
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&cq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&cq->hr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
|
||||||
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
index d503031..afde313 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
@@ -407,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+enum {
|
||||||
|
+ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS |
|
||||||
|
+ IBV_CQ_INIT_ATTR_MASK_PD,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
enum {
|
||||||
|
CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
|
||||||
|
IBV_WC_EX_WITH_CVLAN,
|
||||||
|
@@ -415,21 +420,47 @@ enum {
|
||||||
|
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
|
||||||
|
struct hns_roce_context *context)
|
||||||
|
{
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain);
|
||||||
|
+
|
||||||
|
if (!attr->cqe || attr->cqe > context->max_cqe)
|
||||||
|
return EINVAL;
|
||||||
|
|
||||||
|
- if (attr->comp_mask)
|
||||||
|
+ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) {
|
||||||
|
+ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n",
|
||||||
|
+ attr->comp_mask);
|
||||||
|
return EOPNOTSUPP;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
|
||||||
|
return EOPNOTSUPP;
|
||||||
|
|
||||||
|
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
|
||||||
|
+ if (!pad) {
|
||||||
|
+ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n");
|
||||||
|
+ return EINVAL;
|
||||||
|
+ }
|
||||||
|
+ atomic_fetch_add(&pad->pd.refcount, 1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
|
||||||
|
roundup_pow_of_two(attr->cqe));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int hns_roce_cq_spinlock_init(struct ibv_context *context,
|
||||||
|
+ struct hns_roce_cq *cq,
|
||||||
|
+ struct ibv_cq_init_attr_ex *attr)
|
||||||
|
+{
|
||||||
|
+ bool need_lock;
|
||||||
|
+
|
||||||
|
+ need_lock = hns_roce_whether_need_lock(attr->parent_domain);
|
||||||
|
+ if (!need_lock)
|
||||||
|
+ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
|
||||||
|
+
|
||||||
|
+ return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
|
||||||
|
{
|
||||||
|
int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
|
||||||
|
@@ -486,7 +517,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
|
||||||
|
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
|
||||||
|
+ cq->parent_domain = attr->parent_domain;
|
||||||
|
+
|
||||||
|
+ ret = hns_roce_cq_spinlock_init(context, cq, attr);
|
||||||
|
if (ret)
|
||||||
|
goto err_lock;
|
||||||
|
|
||||||
|
@@ -517,8 +551,9 @@ err_cmd:
|
||||||
|
hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
|
||||||
|
err_db:
|
||||||
|
hns_roce_free_buf(&cq->buf);
|
||||||
|
-err_lock:
|
||||||
|
err_buf:
|
||||||
|
+ hns_roce_spinlock_destroy(&cq->hr_lock);
|
||||||
|
+err_lock:
|
||||||
|
free(cq);
|
||||||
|
err:
|
||||||
|
if (ret < 0)
|
||||||
|
@@ -569,16 +604,23 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
|
||||||
|
|
||||||
|
int hns_roce_u_destroy_cq(struct ibv_cq *cq)
|
||||||
|
{
|
||||||
|
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = ibv_cmd_destroy_cq(cq);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
|
||||||
|
- HNS_ROCE_CQ_TYPE_DB);
|
||||||
|
- hns_roce_free_buf(&to_hr_cq(cq)->buf);
|
||||||
|
- free(to_hr_cq(cq));
|
||||||
|
+ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB);
|
||||||
|
+ hns_roce_free_buf(&hr_cq->buf);
|
||||||
|
+
|
||||||
|
+ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
|
||||||
|
+
|
||||||
|
+ if (pad)
|
||||||
|
+ atomic_fetch_sub(&pad->pd.refcount, 1);
|
||||||
|
+
|
||||||
|
+ free(hr_cq);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.30.0
|
||||||
|
|
||||||
148
0010-libhns-Add-support-for-lock-free-SRQ.patch
Normal file
148
0010-libhns-Add-support-for-lock-free-SRQ.patch
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
From c252a18578d12fd27b726b7b376fbebc3f2c98c3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zzry <1245464216@qq.com>
|
||||||
|
Date: Fri, 8 Mar 2024 16:33:48 +0800
|
||||||
|
Subject: [PATCH 10/10] libhns: Add support for lock-free SRQ
|
||||||
|
|
||||||
|
Drop SRQ locks when associated to a PAD holding a TD.
|
||||||
|
---
|
||||||
|
providers/hns/hns_roce_u.h | 2 +-
|
||||||
|
providers/hns/hns_roce_u_hw_v2.c | 8 ++++----
|
||||||
|
providers/hns/hns_roce_u_verbs.c | 31 +++++++++++++++++++++++++++++--
|
||||||
|
3 files changed, 34 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||||
|
index 0035e36..21a6e28 100644
|
||||||
|
--- a/providers/hns/hns_roce_u.h
|
||||||
|
+++ b/providers/hns/hns_roce_u.h
|
||||||
|
@@ -292,7 +292,7 @@ struct hns_roce_srq {
|
||||||
|
struct hns_roce_idx_que idx_que;
|
||||||
|
struct hns_roce_buf wqe_buf;
|
||||||
|
struct hns_roce_rinl_buf srq_rinl_buf;
|
||||||
|
- pthread_spinlock_t lock;
|
||||||
|
+ struct hns_roce_spinlock hr_lock;
|
||||||
|
unsigned long *wrid;
|
||||||
|
unsigned int srqn;
|
||||||
|
unsigned int wqe_cnt;
|
||||||
|
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
index 2fb4d72..1d7a304 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||||
|
@@ -229,14 +229,14 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
|
||||||
|
uint32_t bitmap_num;
|
||||||
|
int bit_num;
|
||||||
|
|
||||||
|
- pthread_spin_lock(&srq->lock);
|
||||||
|
+ hns_roce_spin_lock(&srq->hr_lock);
|
||||||
|
|
||||||
|
bitmap_num = ind / BIT_CNT_PER_LONG;
|
||||||
|
bit_num = ind % BIT_CNT_PER_LONG;
|
||||||
|
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
|
||||||
|
srq->idx_que.tail++;
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&srq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&srq->hr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_srq_from_cqe(struct hns_roce_v2_cqe *cqe,
|
||||||
|
@@ -1756,7 +1756,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||||
|
int ret = 0;
|
||||||
|
void *wqe;
|
||||||
|
|
||||||
|
- pthread_spin_lock(&srq->lock);
|
||||||
|
+ hns_roce_spin_lock(&srq->hr_lock);
|
||||||
|
|
||||||
|
max_sge = srq->max_gs - srq->rsv_sge;
|
||||||
|
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||||
|
@@ -1795,7 +1795,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||||
|
update_srq_db(ctx, &srq_db, srq);
|
||||||
|
}
|
||||||
|
|
||||||
|
- pthread_spin_unlock(&srq->lock);
|
||||||
|
+ hns_roce_spin_unlock(&srq->hr_lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
index afde313..00e59dc 100644
|
||||||
|
--- a/providers/hns/hns_roce_u_verbs.c
|
||||||
|
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||||
|
@@ -461,6 +461,19 @@ static int hns_roce_cq_spinlock_init(struct ibv_context *context,
|
||||||
|
return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int hns_roce_srq_spinlock_init(struct ibv_context *context,
|
||||||
|
+ struct hns_roce_srq *srq,
|
||||||
|
+ struct ibv_srq_init_attr_ex *attr)
|
||||||
|
+{
|
||||||
|
+ bool need_lock;
|
||||||
|
+
|
||||||
|
+ need_lock = hns_roce_whether_need_lock(attr->pd);
|
||||||
|
+ if (!need_lock)
|
||||||
|
+ verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n");
|
||||||
|
+
|
||||||
|
+ return hns_roce_spinlock_init(&srq->hr_lock, need_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
|
||||||
|
{
|
||||||
|
int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
|
||||||
|
@@ -830,6 +843,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||||
|
struct ibv_srq_init_attr_ex *init_attr)
|
||||||
|
{
|
||||||
|
struct hns_roce_context *hr_ctx = to_hr_ctx(context);
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(init_attr->pd);
|
||||||
|
struct hns_roce_srq *srq;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
@@ -843,12 +857,15 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
|
||||||
|
+ if (pad)
|
||||||
|
+ atomic_fetch_add(&pad->pd.refcount, 1);
|
||||||
|
+
|
||||||
|
+ if (hns_roce_srq_spinlock_init(context, srq, init_attr))
|
||||||
|
goto err_free_srq;
|
||||||
|
|
||||||
|
set_srq_param(context, srq, init_attr);
|
||||||
|
if (alloc_srq_buf(srq))
|
||||||
|
- goto err_free_srq;
|
||||||
|
+ goto err_destroy_lock;
|
||||||
|
|
||||||
|
srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB);
|
||||||
|
if (!srq->rdb)
|
||||||
|
@@ -879,6 +896,9 @@ err_srq_db:
|
||||||
|
err_srq_buf:
|
||||||
|
free_srq_buf(srq);
|
||||||
|
|
||||||
|
+err_destroy_lock:
|
||||||
|
+ hns_roce_spinlock_destroy(&srq->hr_lock);
|
||||||
|
+
|
||||||
|
err_free_srq:
|
||||||
|
free(srq);
|
||||||
|
|
||||||
|
@@ -943,6 +963,7 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr)
|
||||||
|
int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
|
||||||
|
{
|
||||||
|
struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context);
|
||||||
|
+ struct hns_roce_pad *pad = to_hr_pad(ibv_srq->pd);
|
||||||
|
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
@@ -954,6 +975,12 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
|
||||||
|
|
||||||
|
hns_roce_free_db(ctx, srq->rdb, HNS_ROCE_SRQ_TYPE_DB);
|
||||||
|
free_srq_buf(srq);
|
||||||
|
+
|
||||||
|
+ hns_roce_spinlock_destroy(&srq->hr_lock);
|
||||||
|
+
|
||||||
|
+ if (pad)
|
||||||
|
+ atomic_fetch_sub(&pad->pd.refcount, 1);
|
||||||
|
+
|
||||||
|
free(srq);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
--
|
||||||
|
2.30.0
|
||||||
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
Name: rdma-core
|
Name: rdma-core
|
||||||
Version: 50.0
|
Version: 50.0
|
||||||
Release: 3
|
Release: 4
|
||||||
Summary: RDMA core userspace libraries and daemons
|
Summary: RDMA core userspace libraries and daemons
|
||||||
License: GPLv2 or BSD
|
License: GPLv2 or BSD
|
||||||
Url: https://github.com/linux-rdma/rdma-core
|
Url: https://github.com/linux-rdma/rdma-core
|
||||||
@ -12,6 +12,10 @@ patch3: 0003-Update-kernel-headers.patch
|
|||||||
patch4: 0004-libhns-Introduce-hns-direct-verbs.patch
|
patch4: 0004-libhns-Introduce-hns-direct-verbs.patch
|
||||||
patch5: 0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch
|
patch5: 0005-libhns-Encapsulate-context-attribute-setting-into-a-.patch
|
||||||
patch6: 0006-libhns-Support-congestion-control-algorithm-configur.patch
|
patch6: 0006-libhns-Support-congestion-control-algorithm-configur.patch
|
||||||
|
patch7: 0007-libhns-Add-support-for-thread-domain-and-parent-doma.patch
|
||||||
|
patch8: 0008-libhns-Add-support-for-lock-free-QP.patch
|
||||||
|
patch9: 0009-libhns-Add-support-for-lock-free-CQ.patch
|
||||||
|
patch10: 0010-libhns-Add-support-for-lock-free-SRQ.patch
|
||||||
|
|
||||||
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
||||||
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
||||||
@ -590,6 +594,12 @@ fi
|
|||||||
%{_mandir}/*
|
%{_mandir}/*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Mar 21 2024 Ran Zhou <zhouran10@h-partners.com> - 50.0-4
|
||||||
|
- Type: requirement
|
||||||
|
- ID: NA
|
||||||
|
- SUG: NA
|
||||||
|
- DESC: Support td lock-free
|
||||||
|
|
||||||
* Tue Mar 12 2024 Ran Zhou <zhouran10@h-partners.com> - 50.0-3
|
* Tue Mar 12 2024 Ran Zhou <zhouran10@h-partners.com> - 50.0-3
|
||||||
- Type: requirement
|
- Type: requirement
|
||||||
- ID: NA
|
- ID: NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user