!38 Update to 41.0
From: @hellotcc Reviewed-by: @li-yangyang20 Signed-off-by: @li-yangyang20
This commit is contained in:
commit
1a0fe62ace
@ -1,41 +0,0 @@
|
||||
From 693d55e80976217215844258e5b78bc115382689 Mon Sep 17 00:00:00 2001
|
||||
From: Guofeng Yue <yueguofeng@hisilicon.com>
|
||||
Date: Mon, 10 Jan 2022 10:44:23 +0800
|
||||
Subject: [PATCH 1/8] Update kernel headers
|
||||
|
||||
To commit 62c4d8878d13 ("RDMA/hns: Remove support for HIP06").
|
||||
|
||||
Signed-off-by: Guofeng Yue <yueguofeng@hisilicon.com>
|
||||
---
|
||||
kernel-headers/rdma/hns-abi.h | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
|
||||
index 42b17765..abfd36e2 100644
|
||||
--- a/kernel-headers/rdma/hns-abi.h
|
||||
+++ b/kernel-headers/rdma/hns-abi.h
|
||||
@@ -77,17 +77,19 @@ enum hns_roce_qp_cap_flags {
|
||||
HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
|
||||
HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
|
||||
HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
|
||||
+ HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
|
||||
};
|
||||
|
||||
struct hns_roce_ib_create_qp_resp {
|
||||
__aligned_u64 cap_flags;
|
||||
+ __aligned_u64 dwqe_mmap_key;
|
||||
};
|
||||
|
||||
struct hns_roce_ib_alloc_ucontext_resp {
|
||||
__u32 qp_tab_size;
|
||||
__u32 cqe_size;
|
||||
- __u32 srq_tab_size;
|
||||
- __u32 reserved;
|
||||
+ __u32 srq_tab_size;
|
||||
+ __u32 reserved;
|
||||
};
|
||||
|
||||
struct hns_roce_ib_alloc_pd_resp {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,120 +0,0 @@
|
||||
From 08ec3c43bf9710fdf3ca664f7cd63436e67339d7 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:34 +0800
|
||||
Subject: [PATCH 2/8] libhns: Fix the ownership of the head/tail pointer of SRQ
|
||||
WQE
|
||||
|
||||
The CQE of SRQ is not generated in the order of wqe, so the wqe_idx
|
||||
corresponding to the idle WQE should be placed in a FIFO, then the hardware
|
||||
will be instructed to obtain the corresponding WQE. Therefore, the WQ
|
||||
of SRQ has no concept of head pointer and tail pointer, but the queue of
|
||||
wqe_idx does.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 4 ++--
|
||||
providers/hns/hns_roce_u_hw_v2.c | 12 ++++++------
|
||||
providers/hns/hns_roce_u_verbs.c | 6 +++---
|
||||
3 files changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 8f805dd1..b3f48113 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -205,6 +205,8 @@ struct hns_roce_idx_que {
|
||||
int entry_shift;
|
||||
unsigned long *bitmap;
|
||||
int bitmap_cnt;
|
||||
+ unsigned int head;
|
||||
+ unsigned int tail;
|
||||
};
|
||||
|
||||
struct hns_roce_srq {
|
||||
@@ -217,8 +219,6 @@ struct hns_roce_srq {
|
||||
unsigned int max_gs;
|
||||
unsigned int rsv_sge;
|
||||
unsigned int wqe_shift;
|
||||
- int head;
|
||||
- int tail;
|
||||
unsigned int *db;
|
||||
unsigned short counter;
|
||||
struct hns_roce_idx_que idx_que;
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4988943a..f947dbd7 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -262,7 +262,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, uint16_t ind)
|
||||
bitmap_num = ind / BIT_CNT_PER_LONG;
|
||||
bit_num = ind % BIT_CNT_PER_LONG;
|
||||
srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
|
||||
- srq->tail++;
|
||||
+ srq->idx_que.tail++;
|
||||
|
||||
pthread_spin_unlock(&srq->lock);
|
||||
}
|
||||
@@ -1564,7 +1564,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
pthread_spin_lock(&srq->lock);
|
||||
|
||||
/* current idx of srqwq */
|
||||
- ind = srq->head & (srq->wqe_cnt - 1);
|
||||
+ ind = srq->idx_que.head & (srq->wqe_cnt - 1);
|
||||
|
||||
max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
@@ -1574,7 +1574,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- if (srq->head == srq->tail) {
|
||||
+ if (srq->idx_que.head == srq->idx_que.tail) {
|
||||
ret = -ENOMEM;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
@@ -1607,7 +1607,7 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
}
|
||||
|
||||
if (nreq) {
|
||||
- srq->head += nreq;
|
||||
+ srq->idx_que.head += nreq;
|
||||
|
||||
/*
|
||||
* Make sure that descriptors are written before
|
||||
@@ -1617,8 +1617,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
|
||||
srq_db.byte_4 = htole32(HNS_ROCE_V2_SRQ_DB << DB_BYTE_4_CMD_S |
|
||||
srq->srqn);
|
||||
- srq_db.parameter =
|
||||
- htole32(srq->head & DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
+ srq_db.parameter = htole32(srq->idx_que.head &
|
||||
+ DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
|
||||
hns_roce_write64((uint32_t *)&srq_db, ctx,
|
||||
ROCEE_VF_DB_CFG0_OFFSET);
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 30ab072a..9b4934b9 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -491,6 +491,9 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
for (i = 0; i < idx_que->bitmap_cnt; ++i)
|
||||
idx_que->bitmap[i] = ~(0UL);
|
||||
|
||||
+ idx_que->head = 0;
|
||||
+ idx_que->tail = srq->wqe_cnt - 1;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -512,9 +515,6 @@ static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
- srq->head = 0;
|
||||
- srq->tail = srq->wqe_cnt - 1;
|
||||
-
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,180 +0,0 @@
|
||||
From 9cc4c4b8d31b35428859ef626d4428fc393aace4 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Thu, 11 Nov 2021 21:08:35 +0800
|
||||
Subject: [PATCH 3/8] libhns: Fix wrong data type when writing doorbell
|
||||
|
||||
The DB data is a __le32[] value instead of uint32_t[], and the DB register
|
||||
should be written with a little-endian data instead of uint64_t.
|
||||
|
||||
Fixes: 1523fbb1ea8e ("libhns: Add verbs of cq support")
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_db.h | 14 ++++----------
|
||||
providers/hns/hns_roce_u_hw_v1.c | 17 +++++++++--------
|
||||
providers/hns/hns_roce_u_hw_v2.c | 23 ++++++++++++-----------
|
||||
3 files changed, 25 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_db.h b/providers/hns/hns_roce_u_db.h
|
||||
index b44e64d4..13df9b52 100644
|
||||
--- a/providers/hns/hns_roce_u_db.h
|
||||
+++ b/providers/hns/hns_roce_u_db.h
|
||||
@@ -32,23 +32,17 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
+#include <util/mmio.h>
|
||||
#include "hns_roce_u.h"
|
||||
|
||||
#ifndef _HNS_ROCE_U_DB_H
|
||||
#define _HNS_ROCE_U_DB_H
|
||||
|
||||
-#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[1] << 32 | val[0])
|
||||
-#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
-#define HNS_ROCE_PAIR_TO_64(val) ((uint64_t) val[0] << 32 | val[1])
|
||||
-#else
|
||||
-#error __BYTE_ORDER not defined
|
||||
-#endif
|
||||
+#define HNS_ROCE_WORD_NUM 2
|
||||
|
||||
-static inline void hns_roce_write64(uint32_t val[2],
|
||||
- struct hns_roce_context *ctx, int offset)
|
||||
+static inline void hns_roce_write64(void *dest, __le32 val[HNS_ROCE_WORD_NUM])
|
||||
{
|
||||
- *(volatile uint64_t *) (ctx->uar + offset) = HNS_ROCE_PAIR_TO_64(val);
|
||||
+ mmio_write64_le(dest, *(__le64 *)val);
|
||||
}
|
||||
|
||||
void *hns_roce_alloc_db(struct hns_roce_context *ctx,
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 8f0a71aa..14ee4817 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -65,7 +65,7 @@ static void hns_roce_update_rq_head(struct hns_roce_context *ctx,
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&rq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
|
||||
@@ -84,7 +84,7 @@ static void hns_roce_update_sq_head(struct hns_roce_context *ctx,
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_DB_SQ_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_SQ_L_0_REG, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
@@ -102,7 +102,7 @@ static void hns_roce_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
CQ_DB_U32_4_CONS_IDX_S,
|
||||
cq->cons_index & ((cq->cq_depth << 1) - 1));
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_handle_error_cqe(struct hns_roce_cqe *cqe,
|
||||
@@ -422,10 +422,11 @@ static int hns_roce_u_v1_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
*/
|
||||
static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
{
|
||||
- uint32_t ci;
|
||||
- uint32_t solicited_flag;
|
||||
- struct hns_roce_cq_db cq_db = {};
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
+ struct hns_roce_cq_db cq_db = {};
|
||||
+ uint32_t solicited_flag;
|
||||
+ uint32_t ci;
|
||||
|
||||
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
|
||||
solicited_flag = solicited ? HNS_ROCE_CQ_DB_REQ_SOL :
|
||||
@@ -441,8 +442,8 @@ static int hns_roce_u_v1_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
roce_set_field(cq_db.u32_4, CQ_DB_U32_4_CONS_IDX_M,
|
||||
CQ_DB_U32_4_CONS_IDX_S, ci);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
|
||||
- ROCEE_DB_OTHERS_L_0_REG);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_DB_OTHERS_L_0_REG, (__le32 *)&cq_db);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index f947dbd7..efd949f4 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -293,7 +293,7 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
HNS_ROCE_V2_RQ_DB);
|
||||
rq_db.parameter = htole32(rq_head);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&rq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&rq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
sq_db.parameter = htole32(sq_head);
|
||||
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
@@ -325,7 +325,7 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
roce_set_field(cq_db.parameter, DB_PARAM_CQ_CMD_SN_M,
|
||||
DB_PARAM_CQ_CMD_SN_S, 1);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
}
|
||||
|
||||
static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
@@ -659,11 +659,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
|
||||
|
||||
static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
{
|
||||
- uint32_t ci;
|
||||
- uint32_t cmd_sn;
|
||||
- uint32_t solicited_flag;
|
||||
- struct hns_roce_db cq_db = {};
|
||||
+ struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
|
||||
struct hns_roce_cq *cq = to_hr_cq(ibvcq);
|
||||
+ struct hns_roce_db cq_db = {};
|
||||
+ uint32_t solicited_flag;
|
||||
+ uint32_t cmd_sn;
|
||||
+ uint32_t ci;
|
||||
|
||||
ci = cq->cons_index & ((cq->cq_depth << 1) - 1);
|
||||
cmd_sn = cq->arm_sn & HNS_ROCE_CMDSN_MASK;
|
||||
@@ -681,8 +682,8 @@ static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
|
||||
DB_PARAM_CQ_CMD_SN_S, cmd_sn);
|
||||
roce_set_bit(cq_db.parameter, DB_PARAM_CQ_NOTIFY_S, solicited_flag);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
|
||||
- ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&cq_db);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1620,8 +1621,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
srq_db.parameter = htole32(srq->idx_que.head &
|
||||
DB_PARAM_SRQ_PRODUCER_COUNTER_M);
|
||||
|
||||
- hns_roce_write64((uint32_t *)&srq_db, ctx,
|
||||
- ROCEE_VF_DB_CFG0_OFFSET);
|
||||
+ hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET,
|
||||
+ (__le32 *)&srq_db);
|
||||
}
|
||||
|
||||
pthread_spin_unlock(&srq->lock);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
From 60d82566fc94b11280be26733bc306e6af3d2697 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 9 Nov 2021 20:40:58 +0800
|
||||
Subject: [PATCH 4/8] libhns: Remove unsupported QP type
|
||||
|
||||
Currently, user space does not support UC type QP.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v1.c | 1 -
|
||||
providers/hns/hns_roce_u_hw_v2.c | 3 +--
|
||||
2 files changed, 1 insertion(+), 3 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v1.c b/providers/hns/hns_roce_u_hw_v1.c
|
||||
index 14ee4817..279c9b0f 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v1.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v1.c
|
||||
@@ -532,7 +532,6 @@ static int hns_roce_u_v1_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
ctrl->flag |= htole32(ps_opcode);
|
||||
wqe += sizeof(struct hns_roce_wqe_raddr_seg);
|
||||
break;
|
||||
- case IBV_QPT_UC:
|
||||
case IBV_QPT_UD:
|
||||
default:
|
||||
break;
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index efd949f4..c62f74b5 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -460,8 +460,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
|
||||
struct hns_roce_qp **cur_qp,
|
||||
struct ibv_wc *wc, uint32_t opcode)
|
||||
{
|
||||
- if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC ||
|
||||
- (*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_UC) &&
|
||||
+ if (((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_RC) &&
|
||||
(opcode == HNS_ROCE_RECV_OP_SEND ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_IMM ||
|
||||
opcode == HNS_ROCE_RECV_OP_SEND_WITH_INV) &&
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
From e460a4208d1821b1477e621ad5a7b72068e844f9 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:32 +0800
|
||||
Subject: [PATCH 5/8] libhns: Avoid using WQE indexes that exceed the SRQ size
|
||||
|
||||
The index of SRQ WQE got from bitmap may be greater than the capability,
|
||||
so a check for that should be added.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 20 ++++++++++++++------
|
||||
1 file changed, 14 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index c62f74b5..1169b64b 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1527,8 +1527,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int find_empty_entry(struct hns_roce_idx_que *idx_que)
|
||||
+static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
int bit_num;
|
||||
int i;
|
||||
|
||||
@@ -1536,12 +1537,20 @@ static int find_empty_entry(struct hns_roce_idx_que *idx_que)
|
||||
for (i = 0; i < idx_que->bitmap_cnt && idx_que->bitmap[i] == 0; ++i)
|
||||
;
|
||||
if (i == idx_que->bitmap_cnt)
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
|
||||
bit_num = ffsl(idx_que->bitmap[i]);
|
||||
idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
|
||||
|
||||
- return i * BIT_CNT_PER_LONG + (bit_num - 1);
|
||||
+ *wqe_idx = i * BIT_CNT_PER_LONG + (bit_num - 1);
|
||||
+
|
||||
+ /* If wqe_cnt is less than BIT_CNT_PER_LONG, wqe_idx may be greater
|
||||
+ * than wqe_cnt.
|
||||
+ */
|
||||
+ if (*wqe_idx >= srq->wqe_cnt)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
@@ -1580,9 +1589,8 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- wqe_idx = find_empty_entry(&srq->idx_que);
|
||||
- if (wqe_idx < 0 || wqe_idx >= srq->wqe_cnt) {
|
||||
- ret = -ENOMEM;
|
||||
+ ret = get_wqe_idx(srq, &wqe_idx);
|
||||
+ if (ret) {
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From 91034654bdb2fd6e1fce81b4c1aea41bb4b6bf98 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:33 +0800
|
||||
Subject: [PATCH 6/8] libhns: Don't create RQ for a QP that associated with a
|
||||
SRQ
|
||||
|
||||
If a QP is associated with a SRQ, it's RQ should not be created.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 9b4934b9..125858d2 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -760,6 +760,11 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx,
|
||||
cap->max_recv_sge > ctx->max_sge)
|
||||
return -EINVAL;
|
||||
|
||||
+ if (attr->srq) {
|
||||
+ cap->max_recv_wr = 0;
|
||||
+ cap->max_recv_sge = 0;
|
||||
+ }
|
||||
+
|
||||
min_wqe_num = hr_dev->hw_version == HNS_ROCE_HW_VER1 ?
|
||||
HNS_ROCE_V1_MIN_WQE_NUM : HNS_ROCE_V2_MIN_WQE_NUM;
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,368 +0,0 @@
|
||||
From 64c66455fef1c908cc8f06a2b71aa2fd71806218 Mon Sep 17 00:00:00 2001
|
||||
From: Yixing Liu <liuyixing1@huawei.com>
|
||||
Date: Wed, 15 Dec 2021 16:42:30 +0800
|
||||
Subject: [PATCH 7/8] libhns: Add support for direct wqe
|
||||
|
||||
The current write wqe mechanism is to write to DDR first, and then notify
|
||||
the hardware through doorbell to read the data. Direct wqe is a mechanism
|
||||
to fill wqe directly into the hardware. In the case of light load, the wqe
|
||||
will be filled into pcie bar space of the hardware, this will reduce one
|
||||
memory access operation and therefore reduce the latency. SIMD instructions
|
||||
allows cpu to write the 512 bits at one time to device memory, thus it can
|
||||
be used for posting direct wqe.
|
||||
|
||||
The process of post send of HIP08/09:
|
||||
|
||||
+-----------+
|
||||
| post send |
|
||||
+-----+-----+
|
||||
|
|
||||
+-----+-----+
|
||||
| write WQE |
|
||||
+-----+-----+
|
||||
|
|
||||
| udma_to_device_barrier()
|
||||
|
|
||||
+-----+-----+ Y +-----------+ N
|
||||
| HIP09 ? +------+ multi WR ?+-------------+
|
||||
+-----+-----+ +-----+-----+ |
|
||||
| N | Y |
|
||||
+-----+-----+ +-----+-----+ +--------+--------+
|
||||
| ring DB | | ring DB | |direct WQE (ST4) |
|
||||
+-----------+ +-----------+ +-----------------+
|
||||
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 5 +++-
|
||||
providers/hns/hns_roce_u_hw_v2.c | 43 ++++++++++++++++++++++++++------
|
||||
providers/hns/hns_roce_u_hw_v2.h | 31 +++++++++++++----------
|
||||
providers/hns/hns_roce_u_verbs.c | 26 +++++++++++++++++--
|
||||
util/mmio.h | 27 +++++++++++++++++++-
|
||||
5 files changed, 107 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index b3f48113..37711363 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -80,6 +80,8 @@
|
||||
|
||||
#define INVALID_SGE_LENGTH 0x80000000
|
||||
|
||||
+#define HNS_ROCE_DWQE_PAGE_SIZE 65536
|
||||
+
|
||||
#define HNS_ROCE_ADDRESS_MASK 0xFFFFFFFF
|
||||
#define HNS_ROCE_ADDRESS_SHIFT 32
|
||||
|
||||
@@ -279,13 +281,14 @@ struct hns_roce_qp {
|
||||
struct hns_roce_sge_ex ex_sge;
|
||||
unsigned int next_sge;
|
||||
int port_num;
|
||||
- int sl;
|
||||
+ uint8_t sl;
|
||||
unsigned int qkey;
|
||||
enum ibv_mtu path_mtu;
|
||||
|
||||
struct hns_roce_rinl_buf rq_rinl_buf;
|
||||
unsigned long flags;
|
||||
int refcnt; /* specially used for XRC */
|
||||
+ void *dwqe_page;
|
||||
};
|
||||
|
||||
struct hns_roce_av {
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 1169b64b..f102fd61 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -33,6 +33,7 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
+#include <sys/mman.h>
|
||||
#include "hns_roce_u.h"
|
||||
#include "hns_roce_u_db.h"
|
||||
#include "hns_roce_u_hw_v2.h"
|
||||
@@ -297,20 +298,40 @@ static void hns_roce_update_rq_db(struct hns_roce_context *ctx,
|
||||
}
|
||||
|
||||
static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
- unsigned int qpn, unsigned int sl,
|
||||
- unsigned int sq_head)
|
||||
+ struct hns_roce_qp *qp)
|
||||
{
|
||||
struct hns_roce_db sq_db = {};
|
||||
|
||||
- sq_db.byte_4 = htole32(qpn);
|
||||
+ sq_db.byte_4 = htole32(qp->verbs_qp.qp.qp_num);
|
||||
roce_set_field(sq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S,
|
||||
HNS_ROCE_V2_SQ_DB);
|
||||
- sq_db.parameter = htole32(sq_head);
|
||||
- roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, sl);
|
||||
|
||||
+ sq_db.parameter = htole32(qp->sq.head);
|
||||
+ roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
|
||||
hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
+static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
||||
+{
|
||||
+ mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
|
||||
+}
|
||||
+
|
||||
+static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
+{
|
||||
+ struct hns_roce_rc_sq_wqe *rc_sq_wqe = wqe;
|
||||
+
|
||||
+ /* All kinds of DirectWQE have the same header field layout */
|
||||
+ roce_set_bit(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_FLAG_S, 1);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_L_M,
|
||||
+ RC_SQ_WQE_BYTE_4_DB_SL_L_S, qp->sl);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_DB_SL_H_M,
|
||||
+ RC_SQ_WQE_BYTE_4_DB_SL_H_S, qp->sl >> HNS_ROCE_SL_SHIFT);
|
||||
+ roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
|
||||
+ RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
|
||||
+
|
||||
+ hns_roce_write512(qp->dwqe_page, wqe);
|
||||
+}
|
||||
+
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
struct hns_roce_cq *cq)
|
||||
{
|
||||
@@ -339,8 +360,7 @@ static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx,
|
||||
- struct hns_roce_qp *qp)
|
||||
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
|
||||
{
|
||||
uint32_t qpn = qp->verbs_qp.qp.qp_num;
|
||||
uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
|
||||
@@ -1196,6 +1216,7 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
|
||||
break;
|
||||
case IBV_QPT_UD:
|
||||
ret = set_ud_wqe(wqe, qp, wr, nreq, &sge_info);
|
||||
+ qp->sl = to_hr_ah(wr->wr.ud.ah)->av.sl;
|
||||
break;
|
||||
default:
|
||||
ret = EINVAL;
|
||||
@@ -1214,7 +1235,10 @@ out:
|
||||
|
||||
udma_to_device_barrier();
|
||||
|
||||
- hns_roce_update_sq_db(ctx, ibvqp->qp_num, qp->sl, qp->sq.head);
|
||||
+ if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
|
||||
+ hns_roce_write_dwqe(qp, wqe);
|
||||
+ else
|
||||
+ hns_roce_update_sq_db(ctx, qp);
|
||||
|
||||
if (qp->flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
|
||||
*(qp->sdb) = qp->sq.head & 0xffff;
|
||||
@@ -1506,6 +1530,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
|
||||
+ munmap(qp->dwqe_page, HNS_ROCE_DWQE_PAGE_SIZE);
|
||||
+
|
||||
hns_roce_v2_clear_qp(ctx, qp);
|
||||
|
||||
hns_roce_lock_cqs(ibqp);
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
|
||||
index c13d82e3..af72cd70 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.h
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.h
|
||||
@@ -40,6 +40,8 @@
|
||||
|
||||
#define HNS_ROCE_CMDSN_MASK 0x3
|
||||
|
||||
+#define HNS_ROCE_SL_SHIFT 2
|
||||
+
|
||||
/* V2 REG DEFINITION */
|
||||
#define ROCEE_VF_DB_CFG0_OFFSET 0x0230
|
||||
|
||||
@@ -133,6 +135,8 @@ struct hns_roce_db {
|
||||
#define DB_BYTE_4_CMD_S 24
|
||||
#define DB_BYTE_4_CMD_M GENMASK(27, 24)
|
||||
|
||||
+#define DB_BYTE_4_FLAG_S 31
|
||||
+
|
||||
#define DB_PARAM_SRQ_PRODUCER_COUNTER_S 0
|
||||
#define DB_PARAM_SRQ_PRODUCER_COUNTER_M GENMASK(15, 0)
|
||||
|
||||
@@ -216,8 +220,16 @@ struct hns_roce_rc_sq_wqe {
|
||||
};
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_OPCODE_S 0
|
||||
-#define RC_SQ_WQE_BYTE_4_OPCODE_M \
|
||||
- (((1UL << 5) - 1) << RC_SQ_WQE_BYTE_4_OPCODE_S)
|
||||
+#define RC_SQ_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_S 5
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_S 13
|
||||
+#define RC_SQ_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
|
||||
+
|
||||
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_S 15
|
||||
+#define RC_SQ_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_OWNER_S 7
|
||||
|
||||
@@ -239,6 +251,8 @@ struct hns_roce_rc_sq_wqe {
|
||||
|
||||
#define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
|
||||
|
||||
+#define RC_SQ_WQE_BYTE_4_FLAG_S 31
|
||||
+
|
||||
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
|
||||
#define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
|
||||
(((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
|
||||
@@ -311,23 +325,12 @@ struct hns_roce_ud_sq_wqe {
|
||||
#define UD_SQ_WQE_OPCODE_S 0
|
||||
#define UD_SQ_WQE_OPCODE_M GENMASK(4, 0)
|
||||
|
||||
-#define UD_SQ_WQE_DB_SL_L_S 5
|
||||
-#define UD_SQ_WQE_DB_SL_L_M GENMASK(6, 5)
|
||||
-
|
||||
-#define UD_SQ_WQE_DB_SL_H_S 13
|
||||
-#define UD_SQ_WQE_DB_SL_H_M GENMASK(14, 13)
|
||||
-
|
||||
-#define UD_SQ_WQE_INDEX_S 15
|
||||
-#define UD_SQ_WQE_INDEX_M GENMASK(30, 15)
|
||||
-
|
||||
#define UD_SQ_WQE_OWNER_S 7
|
||||
|
||||
#define UD_SQ_WQE_CQE_S 8
|
||||
|
||||
#define UD_SQ_WQE_SE_S 11
|
||||
|
||||
-#define UD_SQ_WQE_FLAG_S 31
|
||||
-
|
||||
#define UD_SQ_WQE_PD_S 0
|
||||
#define UD_SQ_WQE_PD_M GENMASK(23, 0)
|
||||
|
||||
@@ -376,4 +379,6 @@ struct hns_roce_ud_sq_wqe {
|
||||
|
||||
#define MAX_SERVICE_LEVEL 0x7
|
||||
|
||||
+void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp);
|
||||
+
|
||||
#endif /* _HNS_ROCE_U_HW_V2_H */
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 125858d2..fc902815 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -1076,7 +1076,8 @@ static int hns_roce_store_qp(struct hns_roce_context *ctx,
|
||||
|
||||
static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
|
||||
struct hns_roce_qp *qp,
|
||||
- struct hns_roce_context *ctx)
|
||||
+ struct hns_roce_context *ctx,
|
||||
+ uint64_t *dwqe_mmap_key)
|
||||
{
|
||||
struct hns_roce_create_qp_ex_resp resp_ex = {};
|
||||
struct hns_roce_create_qp_ex cmd_ex = {};
|
||||
@@ -1093,6 +1094,7 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
|
||||
&resp_ex.ibv_resp, sizeof(resp_ex));
|
||||
|
||||
qp->flags = resp_ex.drv_payload.cap_flags;
|
||||
+ *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1144,11 +1146,23 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp,
|
||||
+ uint64_t dwqe_mmap_key)
|
||||
+{
|
||||
+ qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE,
|
||||
+ MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key);
|
||||
+ if (qp->dwqe_page == MAP_FAILED)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
struct ibv_qp_init_attr_ex *attr)
|
||||
{
|
||||
struct hns_roce_context *context = to_hr_ctx(ibv_ctx);
|
||||
struct hns_roce_qp *qp;
|
||||
+ uint64_t dwqe_mmap_key;
|
||||
int ret;
|
||||
|
||||
ret = verify_qp_create_attr(context, attr);
|
||||
@@ -1167,7 +1181,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
if (ret)
|
||||
goto err_buf;
|
||||
|
||||
- ret = qp_exec_create_cmd(attr, qp, context);
|
||||
+ ret = qp_exec_create_cmd(attr, qp, context, &dwqe_mmap_key);
|
||||
if (ret)
|
||||
goto err_cmd;
|
||||
|
||||
@@ -1175,10 +1189,18 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx,
|
||||
if (ret)
|
||||
goto err_store;
|
||||
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
|
||||
+ ret = mmap_dwqe(ibv_ctx, qp, dwqe_mmap_key);
|
||||
+ if (ret)
|
||||
+ goto err_dwqe;
|
||||
+ }
|
||||
+
|
||||
qp_setup_config(attr, qp, context);
|
||||
|
||||
return &qp->verbs_qp.qp;
|
||||
|
||||
+err_dwqe:
|
||||
+ hns_roce_v2_clear_qp(context, qp);
|
||||
err_store:
|
||||
ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
|
||||
err_cmd:
|
||||
diff --git a/util/mmio.h b/util/mmio.h
|
||||
index 101af9dd..01d1455e 100644
|
||||
--- a/util/mmio.h
|
||||
+++ b/util/mmio.h
|
||||
@@ -210,8 +210,33 @@ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
{
|
||||
s390_mmio_write(dest, src, bytecnt);
|
||||
}
|
||||
-#else
|
||||
|
||||
+#elif defined(__aarch64__) || defined(__arm__)
|
||||
+#include <arm_neon.h>
|
||||
+
|
||||
+static inline void _mmio_memcpy_x64_64b(void *dest, const void *src)
|
||||
+{
|
||||
+ vst4q_u64(dest, vld4q_u64(src));
|
||||
+}
|
||||
+
|
||||
+static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
+{
|
||||
+ do {
|
||||
+ _mmio_memcpy_x64_64b(dest, src);
|
||||
+ bytecnt -= sizeof(uint64x2x4_t);
|
||||
+ src += sizeof(uint64x2x4_t);
|
||||
+ } while (bytecnt > 0);
|
||||
+}
|
||||
+
|
||||
+#define mmio_memcpy_x64(dest, src, bytecount) \
|
||||
+ ({ \
|
||||
+ if (__builtin_constant_p((bytecount) == 64)) \
|
||||
+ _mmio_memcpy_x64_64b((dest), (src)); \
|
||||
+ else \
|
||||
+ _mmio_memcpy_x64((dest), (src), (bytecount)); \
|
||||
+ })
|
||||
+
|
||||
+#else
|
||||
/* Transfer is some multiple of 64 bytes */
|
||||
static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From 608c142e7cbac2a6c02071022fe87b081a6ddc4f Mon Sep 17 00:00:00 2001
|
||||
From: Yixing Liu <liuyixing1@huawei.com>
|
||||
Date: Tue, 21 Dec 2021 21:38:08 +0800
|
||||
Subject: [PATCH 8/8] libhns: Use new SQ doorbell register for HIP09
|
||||
|
||||
HIP09 set a new BAR space for SQ doorbell. Each SQ doorbell has an
|
||||
independent BAR space and the size is 64KB. SQ doorbell share
|
||||
the same BAR space with direct WQE.
|
||||
|
||||
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 1 +
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 ++--
|
||||
providers/hns/hns_roce_u_verbs.c | 5 +++++
|
||||
3 files changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index 37711363..460363b7 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -238,6 +238,7 @@ struct hns_roce_wq {
|
||||
unsigned int wqe_shift;
|
||||
unsigned int shift; /* wq size is 2^shift */
|
||||
int offset;
|
||||
+ void *db_reg;
|
||||
};
|
||||
|
||||
/* record the result of sge process */
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index f102fd61..9cbc0aac 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -308,7 +308,7 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
|
||||
|
||||
sq_db.parameter = htole32(qp->sq.head);
|
||||
roce_set_field(sq_db.parameter, DB_PARAM_SL_M, DB_PARAM_SL_S, qp->sl);
|
||||
- hns_roce_write64(ctx->uar + ROCEE_VF_DB_CFG0_OFFSET, (__le32 *)&sq_db);
|
||||
+ hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
|
||||
}
|
||||
|
||||
static void hns_roce_write512(uint64_t *dest, uint64_t *val)
|
||||
@@ -329,7 +329,7 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
|
||||
roce_set_field(rc_sq_wqe->byte_4, RC_SQ_WQE_BYTE_4_WQE_INDEX_M,
|
||||
RC_SQ_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
|
||||
|
||||
- hns_roce_write512(qp->dwqe_page, wqe);
|
||||
+ hns_roce_write512(qp->sq.db_reg, wqe);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index fc902815..c5022c83 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -1117,6 +1117,11 @@ static void qp_setup_config(struct ibv_qp_init_attr_ex *attr,
|
||||
}
|
||||
|
||||
qp->max_inline_data = attr->cap.max_inline_data;
|
||||
+
|
||||
+ if (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
|
||||
+ qp->sq.db_reg = qp->dwqe_page;
|
||||
+ else
|
||||
+ qp->sq.db_reg = ctx->uar + ROCEE_VF_DB_CFG0_OFFSET;
|
||||
}
|
||||
|
||||
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
From 5cc1a047c4d71ced86b0f71f66adf12475a3c788 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:35 +0800
|
||||
Subject: libhns: Bugfix for checking whether the SRQ is full when posting WR
|
||||
|
||||
If the user post a list of WRs, the head in the for loop is not updated in
|
||||
time, and the judgment of if (head == tail) becomes invalid.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 17 +++++++++++++----
|
||||
providers/hns/hns_roce_u_verbs.c | 2 +-
|
||||
2 files changed, 14 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 82124082..0c15bdbe 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1527,6 +1527,15 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
+ unsigned int cur;
|
||||
+
|
||||
+ cur = idx_que->head - idx_que->tail;
|
||||
+ return cur >= srq->wqe_cnt - 1;
|
||||
+}
|
||||
+
|
||||
static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
@@ -1577,14 +1586,14 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
|
||||
max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
- if (wr->num_sge > max_sge) {
|
||||
- ret = -EINVAL;
|
||||
+ if (hns_roce_v2_srqwq_overflow(srq)) {
|
||||
+ ret = -ENOMEM;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
|
||||
- if (srq->idx_que.head == srq->idx_que.tail) {
|
||||
- ret = -ENOMEM;
|
||||
+ if (wr->num_sge > max_sge) {
|
||||
+ ret = -EINVAL;
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 3abf7b48..dace35fd 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -492,7 +492,7 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
idx_que->bitmap[i] = ~(0UL);
|
||||
|
||||
idx_que->head = 0;
|
||||
- idx_que->tail = srq->wqe_cnt - 1;
|
||||
+ idx_que->tail = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From a79800afbbc48e5c5274bf3fc0e890705b3a596d Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:36 +0800
|
||||
Subject: libhns: Allow users to create a 0-depth SRQs
|
||||
|
||||
Users is allowed to create 0-depth SRQs, so the judgement about whether
|
||||
max_wr is zero should be removed.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index dace35fd..2d1a6de3 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -525,7 +525,7 @@ static int hns_roce_verify_srq(struct hns_roce_context *context,
|
||||
init_attr->srq_type != IBV_SRQT_XRC)
|
||||
return -EINVAL;
|
||||
|
||||
- if (!init_attr->attr.max_wr || !init_attr->attr.max_sge ||
|
||||
+ if (!init_attr->attr.max_sge ||
|
||||
init_attr->attr.max_wr > context->max_srq_wr ||
|
||||
init_attr->attr.max_sge > context->max_srq_sge)
|
||||
return -EINVAL;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,176 +0,0 @@
|
||||
From f46d1f312984bdb372d2f86ac7dd7c2dcaa8c721 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:37 +0800
|
||||
Subject: libhns: Refactor the process of post_srq_recv
|
||||
|
||||
SRQ is a shared queue, it mainly consists of four parts:
|
||||
|
||||
1. wqe buf: wqe buf is used to store wqe data.
|
||||
|
||||
2. wqe_idx buf: the cqe of SRQ is not generated in the order of wqe, so
|
||||
the wqe_idx corresponding to the idle WQE needs to be placed in an FIFO
|
||||
queue, it can instruct the hardware to obtain the corresponding WQE.
|
||||
|
||||
3.bitmap: bitmap is used to generate and release wqe_idx. When the user
|
||||
has a new WR, the driver finds the idx of the idle wqe in bitmap. When the
|
||||
CQE of wqe is generated, the driver releases the idx.
|
||||
|
||||
4. wr_id buf: wr_id buf is used to store the user's wr_id, then return it
|
||||
to the user when ibv_poll_cq() is invoked.
|
||||
|
||||
After refactor, the functions of the four parts are more clearer.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 95 +++++++++++++++++++-------------
|
||||
1 file changed, 57 insertions(+), 38 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 0c15bdbe..b622eaef 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -242,7 +242,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
||||
return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
|
||||
}
|
||||
|
||||
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
|
||||
+static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
||||
{
|
||||
return srq->buf.buf + (n << srq->wqe_shift);
|
||||
}
|
||||
@@ -1536,7 +1536,21 @@ static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
return cur >= srq->wqe_cnt - 1;
|
||||
}
|
||||
|
||||
-static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
+static int check_post_srq_valid(struct hns_roce_srq *srq,
|
||||
+ struct ibv_recv_wr *wr)
|
||||
+{
|
||||
+ unsigned int max_sge = srq->max_gs - srq->rsv_sge;
|
||||
+
|
||||
+ if (hns_roce_v2_srqwq_overflow(srq))
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (wr->num_sge > max_sge)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int get_wqe_idx(struct hns_roce_srq *srq, unsigned int *wqe_idx)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
int bit_num;
|
||||
@@ -1562,38 +1576,58 @@ static int get_wqe_idx(struct hns_roce_srq *srq, int *wqe_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void fill_srq_wqe(struct hns_roce_srq *srq, unsigned int wqe_idx,
|
||||
+ struct ibv_recv_wr *wr)
|
||||
+{
|
||||
+ struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
+ int i;
|
||||
+
|
||||
+ dseg = get_srq_wqe(srq, wqe_idx);
|
||||
+
|
||||
+ for (i = 0; i < wr->num_sge; ++i) {
|
||||
+ dseg[i].len = htole32(wr->sg_list[i].length);
|
||||
+ dseg[i].lkey = htole32(wr->sg_list[i].lkey);
|
||||
+ dseg[i].addr = htole64(wr->sg_list[i].addr);
|
||||
+ }
|
||||
+
|
||||
+ /* hw stop reading when identify the last one */
|
||||
+ if (srq->rsv_sge) {
|
||||
+ dseg[i].len = htole32(INVALID_SGE_LENGTH);
|
||||
+ dseg[i].lkey = htole32(0x0);
|
||||
+ dseg[i].addr = 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
|
||||
+{
|
||||
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
+ unsigned int head;
|
||||
+ __le32 *idx_buf;
|
||||
+
|
||||
+ head = idx_que->head & (srq->wqe_cnt - 1);
|
||||
+
|
||||
+ idx_buf = get_idx_buf(idx_que, head);
|
||||
+ *idx_buf = htole32(wqe_idx);
|
||||
+
|
||||
+ idx_que->head++;
|
||||
+}
|
||||
+
|
||||
static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
struct ibv_recv_wr *wr,
|
||||
struct ibv_recv_wr **bad_wr)
|
||||
{
|
||||
struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context);
|
||||
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
|
||||
- struct hns_roce_v2_wqe_data_seg *dseg;
|
||||
struct hns_roce_db srq_db;
|
||||
- unsigned int max_sge;
|
||||
- __le32 *srq_idx;
|
||||
+ unsigned int wqe_idx;
|
||||
int ret = 0;
|
||||
- int wqe_idx;
|
||||
- void *wqe;
|
||||
int nreq;
|
||||
- int ind;
|
||||
- int i;
|
||||
|
||||
pthread_spin_lock(&srq->lock);
|
||||
|
||||
- /* current idx of srqwq */
|
||||
- ind = srq->idx_que.head & (srq->wqe_cnt - 1);
|
||||
-
|
||||
- max_sge = srq->max_gs - srq->rsv_sge;
|
||||
for (nreq = 0; wr; ++nreq, wr = wr->next) {
|
||||
- if (hns_roce_v2_srqwq_overflow(srq)) {
|
||||
- ret = -ENOMEM;
|
||||
- *bad_wr = wr;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- if (wr->num_sge > max_sge) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = check_post_srq_valid(srq, wr);
|
||||
+ if (ret) {
|
||||
*bad_wr = wr;
|
||||
break;
|
||||
}
|
||||
@@ -1604,28 +1638,13 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq,
|
||||
break;
|
||||
}
|
||||
|
||||
- wqe = get_srq_wqe(srq, wqe_idx);
|
||||
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
|
||||
-
|
||||
- for (i = 0; i < wr->num_sge; ++i) {
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
- dseg++;
|
||||
- }
|
||||
-
|
||||
- /* hw stop reading when identify the last one */
|
||||
- if (srq->rsv_sge)
|
||||
- set_ending_data_seg(dseg);
|
||||
-
|
||||
- srq_idx = (__le32 *)get_idx_buf(&srq->idx_que, ind);
|
||||
- *srq_idx = htole32(wqe_idx);
|
||||
+ fill_srq_wqe(srq, wqe_idx, wr);
|
||||
+ fill_wqe_idx(srq, wqe_idx);
|
||||
|
||||
srq->wrid[wqe_idx] = wr->wr_id;
|
||||
- ind = (ind + 1) & (srq->wqe_cnt - 1);
|
||||
}
|
||||
|
||||
if (nreq) {
|
||||
- srq->idx_que.head += nreq;
|
||||
-
|
||||
/*
|
||||
* Make sure that descriptors are written before
|
||||
* we write doorbell record.
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From a18b0ee409d3382aa556b8f06a6cd6bfbef3f5c8 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:38 +0800
|
||||
Subject: libhns: Set srqlimit to 0 when creating SRQ
|
||||
|
||||
According to the IB specification, the srq_limt parameter should not be
|
||||
configured when creating srq. But the libhns does not set attr.srq_limit
|
||||
to 0 currently. As a result, when attr.srq_limit provided by the user is
|
||||
not 0, the value of attr.srq_limit returned to the user will be different
|
||||
from that obtained by ibv_query_srq(). Therefore, the driver should set
|
||||
attr.srq_limit to 0 when creating SRQ.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 2d1a6de3..107da753 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -580,6 +580,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||
srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
|
||||
srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
|
||||
attr->max_sge = srq->max_gs;
|
||||
+ attr->srq_limit = 0;
|
||||
|
||||
ret = hns_roce_create_idx_que(srq);
|
||||
if (ret)
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,367 +0,0 @@
|
||||
From b914c76318f5b95e3157c3cbf1ccb49ec6d27635 Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:39 +0800
|
||||
Subject: libhns: Refactor the process of create_srq
|
||||
|
||||
Reorganize create_srq() as several sub-functions to make the process
|
||||
clearer.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 7 +-
|
||||
providers/hns/hns_roce_u_hw_v2.c | 2 +-
|
||||
providers/hns/hns_roce_u_verbs.c | 178 ++++++++++++++++++-------------
|
||||
3 files changed, 105 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index b3f48113..a437727c 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -211,7 +211,8 @@ struct hns_roce_idx_que {
|
||||
|
||||
struct hns_roce_srq {
|
||||
struct verbs_srq verbs_srq;
|
||||
- struct hns_roce_buf buf;
|
||||
+ struct hns_roce_idx_que idx_que;
|
||||
+ struct hns_roce_buf wqe_buf;
|
||||
pthread_spinlock_t lock;
|
||||
unsigned long *wrid;
|
||||
unsigned int srqn;
|
||||
@@ -221,7 +222,6 @@ struct hns_roce_srq {
|
||||
unsigned int wqe_shift;
|
||||
unsigned int *db;
|
||||
unsigned short counter;
|
||||
- struct hns_roce_idx_que idx_que;
|
||||
};
|
||||
|
||||
struct hns_roce_wq {
|
||||
@@ -343,8 +343,7 @@ static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq)
|
||||
|
||||
static inline struct hns_roce_srq *to_hr_srq(struct ibv_srq *ibv_srq)
|
||||
{
|
||||
- return container_of(container_of(ibv_srq, struct verbs_srq, srq),
|
||||
- struct hns_roce_srq, verbs_srq);
|
||||
+ return container_of(ibv_srq, struct hns_roce_srq, verbs_srq.srq);
|
||||
}
|
||||
|
||||
static inline struct hns_roce_qp *to_hr_qp(struct ibv_qp *ibv_qp)
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index b622eaef..d4e7e4f9 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -244,7 +244,7 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
|
||||
|
||||
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n)
|
||||
{
|
||||
- return srq->buf.buf + (n << srq->wqe_shift);
|
||||
+ return srq->wqe_buf.buf + (n << srq->wqe_shift);
|
||||
}
|
||||
|
||||
static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 107da753..75b9e530 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -432,17 +432,23 @@ static int hns_roce_store_srq(struct hns_roce_context *ctx,
|
||||
uint32_t tind = (srq->srqn & (ctx->num_srqs - 1)) >>
|
||||
ctx->srq_table_shift;
|
||||
|
||||
+ pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+
|
||||
if (!ctx->srq_table[tind].refcnt) {
|
||||
ctx->srq_table[tind].table =
|
||||
calloc(ctx->srq_table_mask + 1,
|
||||
sizeof(struct hns_roce_srq *));
|
||||
- if (!ctx->srq_table[tind].table)
|
||||
+ if (!ctx->srq_table[tind].table) {
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
return -ENOMEM;
|
||||
+ }
|
||||
}
|
||||
|
||||
++ctx->srq_table[tind].refcnt;
|
||||
ctx->srq_table[tind].table[srq->srqn & ctx->srq_table_mask] = srq;
|
||||
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -461,13 +467,46 @@ static void hns_roce_clear_srq(struct hns_roce_context *ctx, uint32_t srqn)
|
||||
{
|
||||
uint32_t tind = (srqn & (ctx->num_srqs - 1)) >> ctx->srq_table_shift;
|
||||
|
||||
+ pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+
|
||||
if (!--ctx->srq_table[tind].refcnt)
|
||||
free(ctx->srq_table[tind].table);
|
||||
else
|
||||
ctx->srq_table[tind].table[srqn & ctx->srq_table_mask] = NULL;
|
||||
+
|
||||
+ pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+}
|
||||
+
|
||||
+static int verify_srq_create_attr(struct hns_roce_context *context,
|
||||
+ struct ibv_srq_init_attr_ex *attr)
|
||||
+{
|
||||
+ if (attr->srq_type != IBV_SRQT_BASIC &&
|
||||
+ attr->srq_type != IBV_SRQT_XRC)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!attr->attr.max_sge ||
|
||||
+ attr->attr.max_wr > context->max_srq_wr ||
|
||||
+ attr->attr.max_sge > context->max_srq_sge)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
|
||||
+ struct ibv_srq_init_attr_ex *attr)
|
||||
+{
|
||||
+ if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
+ srq->rsv_sge = 1;
|
||||
+
|
||||
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
|
||||
+ srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
|
||||
+ srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
+ srq->max_gs));
|
||||
+ attr->attr.max_sge = srq->max_gs;
|
||||
+ attr->attr.srq_limit = 0;
|
||||
}
|
||||
|
||||
-static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
+static int alloc_srq_idx_que(struct hns_roce_srq *srq)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
unsigned int buf_size;
|
||||
@@ -478,13 +517,13 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
BIT_CNT_PER_LONG;
|
||||
idx_que->bitmap = calloc(idx_que->bitmap_cnt, sizeof(unsigned long));
|
||||
if (!idx_que->bitmap)
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
|
||||
buf_size = to_hr_hem_entries_size(srq->wqe_cnt, idx_que->entry_shift);
|
||||
if (hns_roce_alloc_buf(&idx_que->buf, buf_size, HNS_HW_PAGE_SIZE)) {
|
||||
free(idx_que->bitmap);
|
||||
idx_que->bitmap = NULL;
|
||||
- return ENOMEM;
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
/* init the idx_que bitmap */
|
||||
@@ -497,40 +536,48 @@ static int hns_roce_create_idx_que(struct hns_roce_srq *srq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hns_roce_alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
+static int alloc_srq_wqe_buf(struct hns_roce_srq *srq)
|
||||
{
|
||||
- int srq_buf_size;
|
||||
+ int buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
|
||||
|
||||
- srq->wrid = calloc(srq->wqe_cnt, sizeof(unsigned long));
|
||||
- if (!srq->wrid)
|
||||
- return ENOMEM;
|
||||
+ return hns_roce_alloc_buf(&srq->wqe_buf, buf_size, HNS_HW_PAGE_SIZE);
|
||||
+}
|
||||
|
||||
- srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
- srq->max_gs));
|
||||
- srq_buf_size = to_hr_hem_entries_size(srq->wqe_cnt, srq->wqe_shift);
|
||||
+static int alloc_srq_buf(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ int ret;
|
||||
|
||||
- /* allocate srq wqe buf */
|
||||
- if (hns_roce_alloc_buf(&srq->buf, srq_buf_size, HNS_HW_PAGE_SIZE)) {
|
||||
- free(srq->wrid);
|
||||
- return ENOMEM;
|
||||
+ ret = alloc_srq_idx_que(srq);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = alloc_srq_wqe_buf(srq);
|
||||
+ if (ret)
|
||||
+ goto err_idx_que;
|
||||
+
|
||||
+ srq->wrid = calloc(srq->wqe_cnt, sizeof(*srq->wrid));
|
||||
+ if (!srq->wrid) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto err_wqe_buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
-}
|
||||
|
||||
-static int hns_roce_verify_srq(struct hns_roce_context *context,
|
||||
- struct ibv_srq_init_attr_ex *init_attr)
|
||||
-{
|
||||
- if (init_attr->srq_type != IBV_SRQT_BASIC &&
|
||||
- init_attr->srq_type != IBV_SRQT_XRC)
|
||||
- return -EINVAL;
|
||||
+err_wqe_buf:
|
||||
+ hns_roce_free_buf(&srq->wqe_buf);
|
||||
+err_idx_que:
|
||||
+ hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free(srq->idx_que.bitmap);
|
||||
|
||||
- if (!init_attr->attr.max_sge ||
|
||||
- init_attr->attr.max_wr > context->max_srq_wr ||
|
||||
- init_attr->attr.max_sge > context->max_srq_sge)
|
||||
- return -EINVAL;
|
||||
+ return ret;
|
||||
+}
|
||||
|
||||
- return 0;
|
||||
+static void free_srq_buf(struct hns_roce_srq *srq)
|
||||
+{
|
||||
+ free(srq->wrid);
|
||||
+ hns_roce_free_buf(&srq->wqe_buf);
|
||||
+ hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free(srq->idx_que.bitmap);
|
||||
}
|
||||
|
||||
static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
@@ -541,7 +588,7 @@ static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
struct hns_roce_create_srq_ex cmd_ex = {};
|
||||
int ret;
|
||||
|
||||
- cmd_ex.buf_addr = (uintptr_t)srq->buf.buf;
|
||||
+ cmd_ex.buf_addr = (uintptr_t)srq->wqe_buf.buf;
|
||||
cmd_ex.que_addr = (uintptr_t)srq->idx_que.buf.buf;
|
||||
cmd_ex.db_addr = (uintptr_t)srq->db;
|
||||
|
||||
@@ -559,57 +606,44 @@ static int exec_srq_create_cmd(struct ibv_context *context,
|
||||
static struct ibv_srq *create_srq(struct ibv_context *context,
|
||||
struct ibv_srq_init_attr_ex *init_attr)
|
||||
{
|
||||
- struct hns_roce_context *ctx = to_hr_ctx(context);
|
||||
- struct ibv_srq_attr *attr = &init_attr->attr;
|
||||
+ struct hns_roce_context *hr_ctx = to_hr_ctx(context);
|
||||
struct hns_roce_srq *srq;
|
||||
int ret;
|
||||
|
||||
- if (hns_roce_verify_srq(ctx, init_attr))
|
||||
- return NULL;
|
||||
+ ret = verify_srq_create_attr(hr_ctx, init_attr);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
|
||||
srq = calloc(1, sizeof(*srq));
|
||||
- if (!srq)
|
||||
- return NULL;
|
||||
+ if (!srq) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto err;
|
||||
+ }
|
||||
|
||||
if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
|
||||
goto err_free_srq;
|
||||
|
||||
- if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
- srq->rsv_sge = 1;
|
||||
-
|
||||
- srq->wqe_cnt = roundup_pow_of_two(attr->max_wr + 1);
|
||||
- srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
|
||||
- attr->max_sge = srq->max_gs;
|
||||
- attr->srq_limit = 0;
|
||||
-
|
||||
- ret = hns_roce_create_idx_que(srq);
|
||||
- if (ret)
|
||||
+ set_srq_param(context, srq, init_attr);
|
||||
+ if (alloc_srq_buf(srq))
|
||||
goto err_free_srq;
|
||||
|
||||
- ret = hns_roce_alloc_srq_buf(srq);
|
||||
- if (ret)
|
||||
- goto err_idx_que;
|
||||
-
|
||||
- srq->db = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB);
|
||||
+ srq->db = hns_roce_alloc_db(hr_ctx, HNS_ROCE_QP_TYPE_DB);
|
||||
if (!srq->db)
|
||||
goto err_srq_buf;
|
||||
|
||||
- *(srq->db) = 0;
|
||||
-
|
||||
- pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
+ *srq->db = 0;
|
||||
|
||||
ret = exec_srq_create_cmd(context, srq, init_attr);
|
||||
if (ret)
|
||||
goto err_srq_db;
|
||||
|
||||
- ret = hns_roce_store_srq(ctx, srq);
|
||||
+ ret = hns_roce_store_srq(hr_ctx, srq);
|
||||
if (ret)
|
||||
goto err_destroy_srq;
|
||||
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
-
|
||||
- srq->max_gs = attr->max_sge;
|
||||
- attr->max_sge = min(attr->max_sge - srq->rsv_sge, ctx->max_srq_sge);
|
||||
+ srq->max_gs = init_attr->attr.max_sge;
|
||||
+ init_attr->attr.max_sge =
|
||||
+ min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
|
||||
|
||||
return &srq->verbs_srq.srq;
|
||||
|
||||
@@ -617,20 +651,19 @@ err_destroy_srq:
|
||||
ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
|
||||
|
||||
err_srq_db:
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
- hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
+ hns_roce_free_db(hr_ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
|
||||
err_srq_buf:
|
||||
- free(srq->wrid);
|
||||
- hns_roce_free_buf(&srq->buf);
|
||||
-
|
||||
-err_idx_que:
|
||||
- free(srq->idx_que.bitmap);
|
||||
- hns_roce_free_buf(&srq->idx_que.buf);
|
||||
+ free_srq_buf(srq);
|
||||
|
||||
err_free_srq:
|
||||
free(srq);
|
||||
|
||||
+err:
|
||||
+ if (ret < 0)
|
||||
+ ret = -ret;
|
||||
+
|
||||
+ errno = ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -690,23 +723,14 @@ int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
|
||||
struct hns_roce_srq *srq = to_hr_srq(ibv_srq);
|
||||
int ret;
|
||||
|
||||
- pthread_mutex_lock(&ctx->srq_table_mutex);
|
||||
-
|
||||
ret = ibv_cmd_destroy_srq(ibv_srq);
|
||||
- if (ret) {
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
+ if (ret)
|
||||
return ret;
|
||||
- }
|
||||
|
||||
hns_roce_clear_srq(ctx, srq->srqn);
|
||||
|
||||
- pthread_mutex_unlock(&ctx->srq_table_mutex);
|
||||
-
|
||||
hns_roce_free_db(ctx, srq->db, HNS_ROCE_QP_TYPE_DB);
|
||||
- hns_roce_free_buf(&srq->buf);
|
||||
- free(srq->wrid);
|
||||
- hns_roce_free_buf(&srq->idx_que.buf);
|
||||
- free(srq->idx_que.bitmap);
|
||||
+ free_srq_buf(srq);
|
||||
free(srq);
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
From d68ac72a8e4f2cf9754d3fcbbb8ff2a03e514c2f Mon Sep 17 00:00:00 2001
|
||||
From: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Date: Tue, 11 May 2021 19:06:40 +0800
|
||||
Subject: libhns: Remove the reserved wqe of SRQ
|
||||
|
||||
There is an unreasonable reserved WQE in SRQ, it should be removed.
|
||||
|
||||
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u.h | 1 +
|
||||
providers/hns/hns_roce_u_hw_v2.c | 4 +---
|
||||
providers/hns/hns_roce_u_verbs.c | 5 ++++-
|
||||
3 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
|
||||
index a437727c..0d7abd81 100644
|
||||
--- a/providers/hns/hns_roce_u.h
|
||||
+++ b/providers/hns/hns_roce_u.h
|
||||
@@ -64,6 +64,7 @@
|
||||
#define HNS_ROCE_MIN_CQE_NUM 0x40
|
||||
#define HNS_ROCE_V1_MIN_WQE_NUM 0x20
|
||||
#define HNS_ROCE_V2_MIN_WQE_NUM 0x40
|
||||
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
|
||||
|
||||
#define HNS_ROCE_CQE_SIZE 0x20
|
||||
#define HNS_ROCE_V3_CQE_SIZE 0x40
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index d4e7e4f9..2fb6cdaf 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -1530,10 +1530,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
|
||||
static int hns_roce_v2_srqwq_overflow(struct hns_roce_srq *srq)
|
||||
{
|
||||
struct hns_roce_idx_que *idx_que = &srq->idx_que;
|
||||
- unsigned int cur;
|
||||
|
||||
- cur = idx_que->head - idx_que->tail;
|
||||
- return cur >= srq->wqe_cnt - 1;
|
||||
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
|
||||
}
|
||||
|
||||
static int check_post_srq_valid(struct hns_roce_srq *srq,
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 75b9e530..4847639b 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -489,6 +489,9 @@ static int verify_srq_create_attr(struct hns_roce_context *context,
|
||||
attr->attr.max_sge > context->max_srq_sge)
|
||||
return -EINVAL;
|
||||
|
||||
+ attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr,
|
||||
+ HNS_ROCE_MIN_SRQ_WQE_NUM);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -498,7 +501,7 @@ static void set_srq_param(struct ibv_context *context, struct hns_roce_srq *srq,
|
||||
if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER2)
|
||||
srq->rsv_sge = 1;
|
||||
|
||||
- srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr + 1);
|
||||
+ srq->wqe_cnt = roundup_pow_of_two(attr->attr.max_wr);
|
||||
srq->max_gs = roundup_pow_of_two(attr->attr.max_sge + srq->rsv_sge);
|
||||
srq->wqe_shift = hr_ilog32(roundup_pow_of_two(HNS_ROCE_SGE_SIZE *
|
||||
srq->max_gs));
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
From 11c81d0e3a987f95b74e03b5e592a45029302f1d Mon Sep 17 00:00:00 2001
|
||||
From: Weihang Li <liweihang@huawei.com>
|
||||
Date: Fri, 14 May 2021 10:02:56 +0800
|
||||
Subject: libhns: Refactor process of setting extended sge
|
||||
|
||||
Refactor and encapsulate the parts of getting number of extended sge a WQE
|
||||
can use to make it easier to understand.
|
||||
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_verbs.c | 45 ++++++++++++++++++++------------
|
||||
1 file changed, 29 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
|
||||
index 30ab072a..a8508fc5 100644
|
||||
--- a/providers/hns/hns_roce_u_verbs.c
|
||||
+++ b/providers/hns/hns_roce_u_verbs.c
|
||||
@@ -920,31 +920,44 @@ err_alloc:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
-static void set_extend_sge_param(struct hns_roce_device *hr_dev,
|
||||
- struct ibv_qp_init_attr_ex *attr,
|
||||
- struct hns_roce_qp *qp, unsigned int wr_cnt)
|
||||
+static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
|
||||
{
|
||||
- int cnt = 0;
|
||||
+ if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
|
||||
+ return qp->sq.max_gs;
|
||||
+
|
||||
+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
|
||||
+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void set_ext_sge_param(struct hns_roce_device *hr_dev,
|
||||
+ struct ibv_qp_init_attr_ex *attr,
|
||||
+ struct hns_roce_qp *qp, unsigned int wr_cnt)
|
||||
+{
|
||||
+ unsigned int total_sge_cnt;
|
||||
+ unsigned int wqe_sge_cnt;
|
||||
+
|
||||
+ qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
|
||||
|
||||
if (hr_dev->hw_version == HNS_ROCE_HW_VER1) {
|
||||
qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
|
||||
- } else {
|
||||
- qp->sq.max_gs = attr->cap.max_send_sge;
|
||||
- if (attr->qp_type == IBV_QPT_UD)
|
||||
- cnt = roundup_pow_of_two(wr_cnt * qp->sq.max_gs);
|
||||
- else if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
|
||||
- cnt = roundup_pow_of_two(wr_cnt *
|
||||
- (qp->sq.max_gs -
|
||||
- HNS_ROCE_SGE_IN_WQE));
|
||||
+ return;
|
||||
}
|
||||
|
||||
- qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
|
||||
+ qp->sq.max_gs = attr->cap.max_send_sge;
|
||||
+
|
||||
+ wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
|
||||
|
||||
/* If the number of extended sge is not zero, they MUST use the
|
||||
* space of HNS_HW_PAGE_SIZE at least.
|
||||
*/
|
||||
- qp->ex_sge.sge_cnt = cnt ?
|
||||
- max(cnt, HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE) : 0;
|
||||
+ if (wqe_sge_cnt) {
|
||||
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
|
||||
+ qp->ex_sge.sge_cnt =
|
||||
+ max(total_sge_cnt,
|
||||
+ (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||||
@@ -988,7 +1001,7 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
|
||||
qp->sq.wqe_cnt = cnt;
|
||||
qp->sq.shift = hr_ilog32(cnt);
|
||||
|
||||
- set_extend_sge_param(hr_dev, attr, qp, cnt);
|
||||
+ set_ext_sge_param(hr_dev, attr, qp, cnt);
|
||||
|
||||
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
|
||||
qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,139 +0,0 @@
|
||||
From 3507f87f776043acd238d7c0c41cc3511f186d08 Mon Sep 17 00:00:00 2001
|
||||
From: Lang Cheng <chenglang@huawei.com>
|
||||
Date: Fri, 14 May 2021 10:02:57 +0800
|
||||
Subject: libhns: Optimize set_sge process
|
||||
|
||||
Use local variables to avoid frequent ldr/str operations. And because UD's
|
||||
process of setting sge is more simple then RC, set_sge() can be splited
|
||||
into two functions for compiler optimization.
|
||||
|
||||
Signed-off-by: Lang Cheng <chenglang@huawei.com>
|
||||
Signed-off-by: Weihang Li <liweihang@huawei.com>
|
||||
---
|
||||
providers/hns/hns_roce_u_hw_v2.c | 83 +++++++++++++++++++++++---------
|
||||
1 file changed, 61 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
|
||||
index 4988943a..dc79a6f8 100644
|
||||
--- a/providers/hns/hns_roce_u_hw_v2.c
|
||||
+++ b/providers/hns/hns_roce_u_hw_v2.c
|
||||
@@ -701,39 +701,78 @@ static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
- struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
- struct hns_roce_sge_info *sge_info)
|
||||
+static void set_rc_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
+ struct hns_roce_sge_info *sge_info)
|
||||
{
|
||||
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
||||
+ uint32_t index = sge_info->start_idx;
|
||||
+ struct ibv_sge *sge = wr->sg_list;
|
||||
+ uint32_t len = 0;
|
||||
+ uint32_t cnt = 0;
|
||||
+ int flag;
|
||||
int i;
|
||||
|
||||
- sge_info->valid_num = 0;
|
||||
- sge_info->total_len = 0;
|
||||
+ flag = (wr->send_flags & IBV_SEND_INLINE &&
|
||||
+ wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
||||
+ wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP);
|
||||
|
||||
- for (i = 0; i < wr->num_sge; i++) {
|
||||
- if (unlikely(!wr->sg_list[i].length))
|
||||
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
||||
+ if (unlikely(!sge->length))
|
||||
continue;
|
||||
|
||||
- sge_info->total_len += wr->sg_list[i].length;
|
||||
- sge_info->valid_num++;
|
||||
+ len += sge->length;
|
||||
+ cnt++;
|
||||
|
||||
- if (wr->send_flags & IBV_SEND_INLINE &&
|
||||
- wr->opcode != IBV_WR_ATOMIC_FETCH_AND_ADD &&
|
||||
- wr->opcode != IBV_WR_ATOMIC_CMP_AND_SWP)
|
||||
+ if (flag)
|
||||
continue;
|
||||
|
||||
- /* No inner sge in UD wqe */
|
||||
- if (sge_info->valid_num <= HNS_ROCE_SGE_IN_WQE &&
|
||||
- qp->verbs_qp.qp.qp_type != IBV_QPT_UD) {
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
+ if (cnt <= HNS_ROCE_SGE_IN_WQE) {
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
dseg++;
|
||||
} else {
|
||||
- dseg = get_send_sge_ex(qp, sge_info->start_idx &
|
||||
- (qp->ex_sge.sge_cnt - 1));
|
||||
- set_data_seg_v2(dseg, wr->sg_list + i);
|
||||
- sge_info->start_idx++;
|
||||
+ dseg = get_send_sge_ex(qp, index & mask);
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
+ index++;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ sge_info->start_idx = index;
|
||||
+ sge_info->valid_num = cnt;
|
||||
+ sge_info->total_len = len;
|
||||
+}
|
||||
+
|
||||
+static void set_ud_sge(struct hns_roce_v2_wqe_data_seg *dseg,
|
||||
+ struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
+ struct hns_roce_sge_info *sge_info)
|
||||
+{
|
||||
+ int flag = wr->send_flags & IBV_SEND_INLINE;
|
||||
+ uint32_t mask = qp->ex_sge.sge_cnt - 1;
|
||||
+ uint32_t index = sge_info->start_idx;
|
||||
+ struct ibv_sge *sge = wr->sg_list;
|
||||
+ uint32_t len = 0;
|
||||
+ uint32_t cnt = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < wr->num_sge; i++, sge++) {
|
||||
+ if (unlikely(!sge->length))
|
||||
+ continue;
|
||||
+
|
||||
+ len += sge->length;
|
||||
+ cnt++;
|
||||
+
|
||||
+ if (flag)
|
||||
+ continue;
|
||||
+
|
||||
+ /* No inner sge in UD wqe */
|
||||
+ dseg = get_send_sge_ex(qp, index & mask);
|
||||
+ set_data_seg_v2(dseg, sge);
|
||||
+ index++;
|
||||
+ }
|
||||
+
|
||||
+ sge_info->start_idx = index;
|
||||
+ sge_info->valid_num = cnt;
|
||||
+ sge_info->total_len = len;
|
||||
}
|
||||
|
||||
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
|
||||
@@ -910,7 +949,7 @@ static int fill_ud_data_seg(struct hns_roce_ud_sq_wqe *ud_sq_wqe,
|
||||
UD_SQ_WQE_MSG_START_SGE_IDX_S,
|
||||
sge_info->start_idx & (qp->ex_sge.sge_cnt - 1));
|
||||
|
||||
- set_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
||||
+ set_ud_sge((struct hns_roce_v2_wqe_data_seg *)ud_sq_wqe, qp, wr, sge_info);
|
||||
|
||||
ud_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
@@ -1111,7 +1150,7 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
|
||||
wqe += sizeof(struct hns_roce_rc_sq_wqe);
|
||||
dseg = wqe;
|
||||
|
||||
- set_sge(dseg, qp, wr, sge_info);
|
||||
+ set_rc_sge(dseg, qp, wr, sge_info);
|
||||
|
||||
rc_sq_wqe->msg_len = htole32(sge_info->total_len);
|
||||
|
||||
--
|
||||
2.30.0
|
||||
|
||||
@ -1,27 +0,0 @@
|
||||
From c381cfa26ba6163b9cc51212702e64bf1d83f838 Mon Sep 17 00:00:00 2001
|
||||
From: swimlessbird <52704385+swimlessbird@users.noreply.github.com>
|
||||
Date: Fri, 17 Sep 2021 14:35:05 +0800
|
||||
Subject: [PATCH] ibdiags: Increase maximum number of CPUs
|
||||
|
||||
In modern systems, the old limit (8) is small enough, so increase
|
||||
to something larger (256).
|
||||
|
||||
Signed-off-by: Suwan Sun <swimlessbird@gmail.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
---
|
||||
infiniband-diags/ibsysstat.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/infiniband-diags/ibsysstat.c b/infiniband-diags/ibsysstat.c
|
||||
index 6ff7ca0c4..73972d039 100644
|
||||
--- a/infiniband-diags/ibsysstat.c
|
||||
+++ b/infiniband-diags/ibsysstat.c
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
#include "ibdiag_common.h"
|
||||
|
||||
-#define MAX_CPUS 8
|
||||
+#define MAX_CPUS 256
|
||||
|
||||
static struct ibmad_port *srcport;
|
||||
|
||||
Binary file not shown.
BIN
rdma-core-41.0.tar.gz
Normal file
BIN
rdma-core-41.0.tar.gz
Normal file
Binary file not shown.
@ -1,29 +1,11 @@
|
||||
Name: rdma-core
|
||||
Version: 35.1
|
||||
Release: 3
|
||||
Version: 41.0
|
||||
Release: 1
|
||||
Summary: RDMA core userspace libraries and daemons
|
||||
License: GPLv2 or BSD
|
||||
Url: https://github.com/linux-rdma/rdma-core
|
||||
Source: https://github.com/linux-rdma/rdma-core/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||
|
||||
Patch0: backport-fixbug-increase-maximum-number-of-cpus-rdma.patch
|
||||
Patch1: 0001-Update-kernel-headers.patch
|
||||
Patch2: 0002-libhns-Fix-the-ownership-of-the-head-tail-pointer-of.patch
|
||||
Patch3: 0003-libhns-Fix-wrong-data-type-when-writing-doorbell.patch
|
||||
Patch4: 0004-libhns-Remove-unsupported-QP-type.patch
|
||||
Patch5: 0005-libhns-Avoid-using-WQE-indexes-that-exceed-the-SRQ-s.patch
|
||||
Patch6: 0006-libhns-Don-t-create-RQ-for-a-QP-that-associated-with.patch
|
||||
Patch7: 0007-libhns-Add-support-for-direct-wqe.patch
|
||||
Patch8: 0008-libhns-Use-new-SQ-doorbell-register-for-HIP09.patch
|
||||
Patch9: 0009-libhns-Bugfix-for-checking-whether-the-SRQ-is-full-w.patch
|
||||
Patch10: 0010-libhns-Allow-users-to-create-a-0-depth-SRQs.patch
|
||||
Patch11: 0011-libhns-Refactor-the-process-of-post_srq_recv.patch
|
||||
Patch12: 0012-libhns-Set-srqlimit-to-0-when-creating-SRQ.patch
|
||||
Patch13: 0013-libhns-Refactor-the-process-of-create_srq.patch
|
||||
Patch14: 0014-libhns-Remove-the-reserved-wqe-of-SRQ.patch
|
||||
Patch15: 0015-libhns-Refactor-process-of-setting-extended-sge.patch
|
||||
Patch16: 0016-libhns-Optimize-set_sge-process.patch
|
||||
|
||||
BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0)
|
||||
BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel
|
||||
BuildRequires: python3-devel python3-Cython python3 python3-docutils perl-generators
|
||||
@ -267,6 +249,12 @@ fi
|
||||
%{_mandir}/*
|
||||
|
||||
%changelog
|
||||
* Mon Jul 25 2022 tangchengchang <tangchengchang@huawei.com> - 41.0-1
|
||||
- Type: requirement
|
||||
- ID: NA
|
||||
- SUG: NA
|
||||
- DESC: update to 41.0
|
||||
|
||||
* Mon Jul 11 2022 luozhengfeng <luozhengfeng@h-partners.com> - 35.1-3
|
||||
- Type: bugfix
|
||||
- ID: NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user