From 448d82b2c62f09f1dd9c8045d34623dedef1c111 Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Fri, 19 Nov 2021 20:21:21 +0800 Subject: [PATCH v4 04/10] libhns: Fix the problem of sge nums Currently, the driver only uses max_send_sge to initialize sge num when creating_qp. So, in the sq inline scenario, the driver may not has enough sge to send data. For example, if max_send_sge is 16 and max_inline_data is 1024, the driver needs 1024/16=64 sge to send data. Therefore, the calculation method of sge num is modified to take the maximum value of max_send_sge and max_inline_data/16 to solve this problem. Fixes:11c81d0e3a98("libhns: Refactor process of setting extended sge") Fixes:b7814b7b9715("libhns: Support inline data in extented sge space for RC") Signed-off-by: Luoyouming Reviewed-by: Yangyang Li --- providers/hns/hns_roce_u.c | 9 +++- providers/hns/hns_roce_u.h | 3 ++ providers/hns/hns_roce_u_abi.h | 2 +- providers/hns/hns_roce_u_hw_v2.c | 13 +---- providers/hns/hns_roce_u_verbs.c | 84 ++++++++++++++++++++++++-------- 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index a46ceb9..1bd5bb1 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -103,9 +103,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, { struct hns_roce_device *hr_dev = to_hr_dev(ibdev); struct hns_roce_alloc_ucontext_resp resp = {}; + struct hns_roce_alloc_ucontext cmd = {}; struct ibv_device_attr dev_attrs; struct hns_roce_context *context; - struct ibv_get_context cmd; int i; context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, @@ -113,7 +113,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (!context) return NULL; - if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd), + cmd.config |= HNS_ROCE_EXSGE_FLAGS; + if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -124,6 +125,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, else context->cqe_size = HNS_ROCE_V3_CQE_SIZE; + context->config = resp.config; + if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS) + context->max_inline_data = resp.max_inline_data; + context->qp_table_shift = calc_table_shift(resp.qp_tab_size, HNS_ROCE_QP_TABLE_BITS); context->qp_table_mask = (1 << context->qp_table_shift) - 1; diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 5d90634..5388f9c 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -213,6 +213,8 @@ struct hns_roce_context { unsigned int max_srq_sge; int max_cqe; unsigned int cqe_size; + uint32_t config; + unsigned int max_inline_data; }; struct hns_roce_pd { @@ -267,6 +269,7 @@ struct hns_roce_wq { unsigned int head; unsigned int tail; unsigned int max_gs; + unsigned int ext_sge_cnt; unsigned int rsv_sge; unsigned int wqe_shift; unsigned int shift; /* wq size is 2^shift */ diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h index 333f977..2753d30 100644 --- a/providers/hns/hns_roce_u_abi.h +++ b/providers/hns/hns_roce_u_abi.h @@ -47,7 +47,7 @@ DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp); DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, - empty, hns_roce_ib_alloc_ucontext_resp); + hns_roce_ib_alloc_ucontext, hns_roce_ib_alloc_ucontext_resp); DECLARE_DRV_CMD(hns_roce_create_qp, IB_USER_VERBS_CMD_CREATE_QP, hns_roce_ib_create_qp, hns_roce_ib_create_qp_resp); diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index bb4298f..ebe68bc 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -841,14 +841,6 @@ static void get_src_buf_info(void **src_addr, uint32_t *src_len, } } -static unsigned int get_std_sge_num(struct hns_roce_qp *qp) -{ - if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD) - return 0; - - return HNS_ROCE_SGE_IN_WQE; -} - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, struct hns_roce_sge_info *sge_info, const void *buf_list, @@ -858,12 +850,9 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, unsigned int sge_mask = qp->ex_sge.sge_cnt - 1; void *dst_addr, *src_addr, *tail_bound_addr; uint32_t src_len, tail_len; - unsigned int std_sge_num; int i; - std_sge_num = get_std_sge_num(qp); - if (sge_info->total_len > - (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) + if (sge_info->total_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) return EINVAL; dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask); diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index ba7f2ae..851b145 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -978,41 +978,88 @@ err_alloc: return -ENOMEM; } -static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +/** + * Calculated sge num according to attr's max_send_sge + */ +static unsigned int get_sge_num_from_max_send_sge(bool is_ud, + uint32_t max_send_sge) { - if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD) - return qp->sq.max_gs; + unsigned int std_sge_num; + unsigned int min_sge; - if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE; + min_sge = is_ud ? 1 : 0; + return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) : + min_sge; +} - return 0; +/** + * Calculated sge num according to attr's max_inline_data + */ +static unsigned int get_sge_num_from_max_inl_data(bool is_ud, + uint32_t max_inline_data) +{ + unsigned int inline_sge = 0; + + inline_sge = max_inline_data / HNS_ROCE_SGE_SIZE; + /* + * if max_inline_data less than + * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, + * In addition to ud's mode, no need to extend sge. + */ + if (!is_ud && (inline_sge <= HNS_ROCE_SGE_IN_WQE)) + inline_sge = 0; + + return inline_sge; } -static void set_ext_sge_param(struct hns_roce_device *hr_dev, +static void set_ext_sge_param(struct hns_roce_context *ctx, struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, unsigned int wr_cnt) { + bool is_ud = (qp->verbs_qp.qp.qp_type == IBV_QPT_UD); + unsigned int ext_wqe_sge_cnt; + unsigned int inline_ext_sge; unsigned int total_sge_cnt; - unsigned int wqe_sge_cnt; + unsigned int std_sge_num; qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT; - - qp->sq.max_gs = attr->cap.max_send_sge; - - wqe_sge_cnt = get_wqe_ext_sge_cnt(qp); + std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE; + ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud, + attr->cap.max_send_sge); + + if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) { + attr->cap.max_inline_data = min_t(uint32_t, roundup_pow_of_two( + attr->cap.max_inline_data), + ctx->max_inline_data); + + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data(is_ud, + attr->cap.max_inline_data)); + qp->sq.ext_sge_cnt = inline_ext_sge ? + roundup_pow_of_two(inline_ext_sge) : 0; + qp->sq.max_gs = min((qp->sq.ext_sge_cnt + std_sge_num), + ctx->max_sge); + + ext_wqe_sge_cnt = qp->sq.ext_sge_cnt; + } else { + qp->sq.max_gs = max(1U, attr->cap.max_send_sge); + qp->sq.max_gs = min(qp->sq.max_gs, ctx->max_sge); + qp->sq.ext_sge_cnt = qp->sq.max_gs; + } /* If the number of extended sge is not zero, they MUST use the * space of HNS_HW_PAGE_SIZE at least. */ - if (wqe_sge_cnt) { - total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt); - qp->ex_sge.sge_cnt = - max(total_sge_cnt, - (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + if (ext_wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(wr_cnt * ext_wqe_sge_cnt); + qp->ex_sge.sge_cnt = max(total_sge_cnt, + (unsigned int)HNS_HW_PAGE_SIZE / + HNS_ROCE_SGE_SIZE); } } + static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, struct hns_roce_context *ctx) @@ -1044,10 +1091,9 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr, qp->sq.wqe_cnt = cnt; qp->sq.shift = hr_ilog32(cnt); - set_ext_sge_param(hr_dev, attr, qp, cnt); + set_ext_sge_param(ctx, attr, qp, cnt); qp->sq.max_post = min(ctx->max_qp_wr, cnt); - qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs); qp->sq_signal_bits = attr->sq_sig_all ? 0 : 1; -- 2.30.0