From 81a2efc28f60ab26398c45236678cc08518b1e41 Mon Sep 17 00:00:00 2001 From: Xin Tian Date: Thu, 8 May 2025 12:10:40 +0800 Subject: [PATCH] libxscale: update to version 2412GA new feature: - support diamond products - support ibv_wr apis - support extended CQ poll apis bugfix: - imm data endian error Signed-off-by: Xin Tian --- providers/xscale/cq.c | 1047 ++++++++++-------------------------- providers/xscale/qp.c | 516 ++++++++++++++---- providers/xscale/verbs.c | 175 ++++-- providers/xscale/xsc_api.h | 4 +- providers/xscale/xsc_hsi.h | 103 ++-- providers/xscale/xscale.c | 12 +- providers/xscale/xscale.h | 37 +- 7 files changed, 923 insertions(+), 971 deletions(-) diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c index e2619f0..609ce2e 100644 --- a/providers/xscale/cq.c +++ b/providers/xscale/cq.c @@ -13,12 +13,12 @@ #include #include -#include #include #include "xscale.h" #include "wqe.h" #include "xsc_hsi.h" +#include "xsc_hw.h" enum { CQ_OK = 0, @@ -68,6 +68,7 @@ static const uint32_t xsc_cqe_opcode[] = { [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, + [XSC_OPCODE_RDMA_CQE_RAW_SNF] = IBV_WC_RECV, }; int xsc_stall_num_loop = 60; @@ -76,16 +77,64 @@ int xsc_stall_cq_poll_max = 100000; int xsc_stall_cq_inc_step = 100; int xsc_stall_cq_dec_step = 10; -static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; -static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) +static void xsc_stall_poll_cq(void) +{ + int i; + + for (i = 0; i < xsc_stall_num_loop; i++) + __asm__ volatile ("nop"); +} + +static inline int get_qp_ctx(struct xsc_context *xctx, + struct xsc_resource **cur_rsc, + uint32_t qpn) + ALWAYS_INLINE; +static inline int get_qp_ctx(struct xsc_context *xctx, + struct xsc_resource **cur_rsc, + uint32_t qpn) +{ + if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); + if (unlikely(!*cur_rsc)) + return CQ_POLL_ERR; + } + + return CQ_OK; +} + +static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, + struct xsc_resource **cur_rsc, + struct xsc_cqe *cqe) ALWAYS_INLINE; +static inline uint8_t xsc_get_cqe_opcode(struct xsc_context *ctx, + struct xsc_resource **cur_rsc, + struct xsc_cqe *cqe) { - if (cqe->is_error) + uint8_t msg_opcode = xsc_hw_get_cqe_msg_opcode(ctx->device_id, cqe); + struct xsc_qp *qp; + int err; + + if (xsc_hw_is_err_cqe(ctx->device_id, cqe)) return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; - if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { + + err = get_qp_ctx(ctx, cur_rsc, RD_LE_16(cqe->qp_id)); + if (unlikely(err)) + goto msg_opcode_err_check; + qp = rsc_to_xqp(*cur_rsc); + if (qp->flags & XSC_QP_FLAG_RAWPACKET_SNIFFER) + return XSC_OPCODE_RDMA_CQE_RAW_SNF; + +msg_opcode_err_check: + if (msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { printf("rdma cqe msg code should be send/write/read\n"); return XSC_OPCODE_RDMA_CQE_ERROR; } - return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; + + return xsc_msg_opcode[msg_opcode][cqe->type][cqe->with_immdt]; } static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) @@ -108,18 +157,11 @@ static void *get_sw_cqe(struct xsc_cq *cq, int n) return NULL; } -static void *next_cqe_sw(struct xsc_cq *cq) -{ - return get_sw_cqe(cq, cq->cons_index); -} - static void update_cons_index(struct xsc_cq *cq) { - union xsc_db_data db; + struct xsc_context *ctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); - db.raw_data = cq->cons_index; - db.cqn = cq->cqn; - WR_REG(cq->db, db.raw_data); + xsc_hw_set_cq_ci(ctx->device_id, cq->db, cq->cqn, cq->cons_index); } static inline void handle_good_req( @@ -140,6 +182,7 @@ static inline void handle_good_req( wc->byte_len = ctrl->msg_len; } wq->flush_wqe_cnt--; + wq->need_flush[idx] = 0; xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, "wqeid:%u, wq tail:%u\n", idx, wq->tail); @@ -182,40 +225,6 @@ static void dump_cqe(void *buf) printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); } -static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) -{ - switch (cqe->error_code) { - case XSC_ERR_CODE_NAK_RETRY: - return IBV_WC_RETRY_EXC_ERR; - case XSC_ERR_CODE_NAK_OPCODE: - return IBV_WC_BAD_RESP_ERR; - case XSC_ERR_CODE_NAK_MR: - return IBV_WC_REM_ACCESS_ERR; - case XSC_ERR_CODE_NAK_OPERATION: - return IBV_WC_REM_OP_ERR; - case XSC_ERR_CODE_NAK_RNR: - return IBV_WC_RNR_RETRY_EXC_ERR; - case XSC_ERR_CODE_LOCAL_MR: - return IBV_WC_LOC_PROT_ERR; - case XSC_ERR_CODE_LOCAL_LEN: - return IBV_WC_LOC_LEN_ERR; - case XSC_ERR_CODE_LEN_GEN_CQE: - return IBV_WC_LOC_LEN_ERR; - case XSC_ERR_CODE_OPERATION: - return IBV_WC_LOC_ACCESS_ERR; - case XSC_ERR_CODE_FLUSH: - return IBV_WC_WR_FLUSH_ERR; - case XSC_ERR_CODE_MALF_WQE_HOST: - case XSC_ERR_CODE_STRG_ACC_GEN_CQE: - return IBV_WC_FATAL_ERR; - case XSC_ERR_CODE_OPCODE_GEN_CQE: - case XSC_ERR_CODE_LOCAL_OPCODE: - default: - return IBV_WC_GENERAL_ERR; - } -} - - static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) { struct xsc_wq *wq; @@ -248,128 +257,49 @@ static inline void handle_bad_req( struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) { int idx; - wc->status = xsc_cqe_error_code(cqe); - wc->vendor_err = cqe->error_code; + + wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); + wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); idx = RD_LE_16(cqe->wqe_id); idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); idx &= (wq->wqe_cnt -1); wq->tail = wq->wqe_head[idx] + 1; wc->wr_id = wq->wrid[idx]; - wq->flush_wqe_cnt--; - - if (cqe->error_code != XSC_ERR_CODE_FLUSH) { + if (wq->need_flush[idx]) + wq->flush_wqe_cnt--; + wq->need_flush[idx] = 0; + if (wc->status != IBV_WC_WR_FLUSH_ERR) { printf("%s: got completion with error:\n", xctx->hostname); dump_cqe(cqe); } + qp->err_occurred = 1; } static inline void handle_bad_responder( struct xsc_context *xctx, - struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) { - wc->status = xsc_cqe_error_code(cqe); - wc->vendor_err = cqe->error_code; + wc->status = xsc_hw_cqe_err_status(xctx->device_id, cqe); + wc->vendor_err = xsc_hw_get_cqe_err_code(xctx->device_id, cqe); ++wq->tail; wq->flush_wqe_cnt--; - - if (cqe->error_code != XSC_ERR_CODE_FLUSH) { + if (wc->status != IBV_WC_WR_FLUSH_ERR) { printf("%s: got completion with error:\n", xctx->hostname); dump_cqe(cqe); } -} - -#if defined(__x86_64__) || defined (__i386__) -static inline unsigned long get_cycles(void) -{ - uint32_t low, high; - uint64_t val; - asm volatile ("rdtsc" : "=a" (low), "=d" (high)); - val = high; - val = (val << 32) | low; - return val; -} - -static void xsc_stall_poll_cq(void) -{ - int i; - - for (i = 0; i < xsc_stall_num_loop; i++) - (void)get_cycles(); -} -static void xsc_stall_cycles_poll_cq(uint64_t cycles) -{ - while (get_cycles() < cycles) - ; /* Nothing */ -} -static void xsc_get_cycles(uint64_t *cycles) -{ - *cycles = get_cycles(); -} -#else -static void xsc_stall_poll_cq(void) -{ -} -static void xsc_stall_cycles_poll_cq(uint64_t cycles) -{ -} -static void xsc_get_cycles(uint64_t *cycles) -{ -} -#endif - -static inline int get_qp_ctx(struct xsc_context *xctx, - struct xsc_resource **cur_rsc, - uint32_t qpn) - ALWAYS_INLINE; -static inline int get_qp_ctx(struct xsc_context *xctx, - struct xsc_resource **cur_rsc, - uint32_t qpn) -{ - if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { - /* - * We do not have to take the QP table lock here, - * because CQs will be locked while QPs are removed - * from the table. - */ - *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); - if (unlikely(!*cur_rsc)) - return CQ_POLL_ERR; - } - - return CQ_OK; -} - -static inline int xsc_get_next_cqe(struct xsc_cq *cq, - struct xsc_cqe64 **pcqe64, - void **pcqe) - ALWAYS_INLINE; -static inline int xsc_get_next_cqe(struct xsc_cq *cq, - struct xsc_cqe64 **pcqe64, - void **pcqe) -{ - void *cqe = next_cqe_sw(cq); - if (!cqe) - return CQ_EMPTY; - - ++cq->cons_index; - - /* - * Make sure we read CQ entry contents after we've checked the - * ownership bit. - */ - udma_from_device_barrier(); - - *pcqe = cqe; - - return CQ_OK; + qp->err_occurred = 1; } static inline int xsc_parse_cqe(struct xsc_cq *cq, - struct xsc_cqe *cqe, - struct xsc_resource **cur_rsc, - struct ibv_wc *wc, - int lazy) + struct xsc_cqe *cqe, + struct xsc_resource **cur_rsc, + struct ibv_wc *wc) + ALWAYS_INLINE; +static inline int xsc_parse_cqe(struct xsc_cq *cq, + struct xsc_cqe *cqe, + struct xsc_resource **cur_rsc, + struct ibv_wc *wc) { struct xsc_wq *wq; uint32_t qp_id; @@ -378,12 +308,14 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, struct xsc_qp *xqp = NULL; struct xsc_context *xctx; + memset(wc, 0, sizeof(*wc)); + wc->wc_flags = 0; + xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); qp_id = cqe->qp_id; qp_id = RD_LE_16(qp_id); - wc->wc_flags = 0; wc->qp_num = qp_id; - opcode = xsc_get_cqe_opcode(cqe); + opcode = xsc_get_cqe_opcode(xctx, cur_rsc, cqe); xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); switch (opcode) { @@ -404,8 +336,9 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: wc->wc_flags |= IBV_WC_WITH_IMM; - wc->imm_data = cqe->imm_data; + WR_BE_32(wc->imm_data, RD_LE_32(cqe->imm_data)); SWITCH_FALLTHROUGH; + case XSC_OPCODE_RDMA_CQE_RAW_SNF: case XSC_OPCODE_RDMA_RSP_RECV: err = get_qp_ctx(xctx, cur_rsc, qp_id); if (unlikely(err)) @@ -428,7 +361,7 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, return CQ_POLL_ERR; xqp = rsc_to_xqp(*cur_rsc); wq = &xqp->rq; - handle_bad_responder(xctx, wc, cqe, wq); + handle_bad_responder(xctx, wc, cqe, xqp, wq); break; case XSC_OPCODE_RDMA_CQE_ERROR: printf("%s: got completion with cqe format error:\n", xctx->hostname); @@ -440,30 +373,121 @@ static inline int xsc_parse_cqe(struct xsc_cq *cq, return CQ_OK; } -static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, - struct xsc_cqe64 *cqe64, - void *cqe, int cqe_ver) - ALWAYS_INLINE; -static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, - struct xsc_cqe64 *cqe64, - void *cqe, int cqe_ver) +static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) ALWAYS_INLINE; +static inline int xsc_parse_cqe_lazy(struct xsc_cq *cq, struct xsc_cqe *cqe) { - return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); + struct xsc_resource *cur_rsc = NULL; + struct xsc_qp *xqp = NULL; + struct xsc_context *xctx; + struct xsc_wq *wq; + uint32_t qp_id; + uint8_t opcode; + int err = 0; + int idx; + + cq->cqe = cqe; + xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + qp_id = cqe->qp_id; + qp_id = RD_LE_16(qp_id); + opcode = xsc_get_cqe_opcode(xctx, &cur_rsc, cqe); + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); + switch (opcode) { + case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: + case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: + case XSC_OPCODE_RDMA_REQ_SEND: + case XSC_OPCODE_RDMA_REQ_WRITE: + case XSC_OPCODE_RDMA_REQ_READ: + cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; + err = get_qp_ctx(xctx, &cur_rsc, qp_id); + if (unlikely(err)) + return CQ_EMPTY; + xqp = rsc_to_xqp(cur_rsc); + wq = &xqp->sq; + idx = RD_LE_16(cqe->wqe_id); + idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt - 1); + cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + wq->flush_wqe_cnt--; + wq->need_flush[idx] = 0; + break; + case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: + case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: + case XSC_OPCODE_RDMA_RSP_RECV: + cq->verbs_cq.cq_ex.status = IBV_WC_SUCCESS; + err = get_qp_ctx(xctx, &cur_rsc, qp_id); + if (unlikely(err)) + return CQ_EMPTY; + xqp = rsc_to_xqp(cur_rsc); + wq = &xqp->rq; + idx = wq->tail & (wq->wqe_cnt - 1); + cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; + ++wq->tail; + wq->flush_wqe_cnt--; + break; + case XSC_OPCODE_RDMA_REQ_ERROR: + cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); + err = get_qp_ctx(xctx, &cur_rsc, qp_id); + if (unlikely(err)) + return CQ_POLL_ERR; + xqp = rsc_to_xqp(cur_rsc); + wq = &xqp->sq; + idx = RD_LE_16(cqe->wqe_id); + idx >>= (wq->wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt - 1); + wq->tail = wq->wqe_head[idx] + 1; + cq->verbs_cq.cq_ex.wr_id = wq->wrid[idx]; + if (wq->need_flush[idx]) + wq->flush_wqe_cnt--; + wq->need_flush[idx] = 0; + if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } + xqp->ibv_qp->state = IBV_QPS_ERR; + break; + case XSC_OPCODE_RDMA_RSP_ERROR: + cq->verbs_cq.cq_ex.status = xsc_hw_cqe_err_status(xctx->device_id, cqe); + err = get_qp_ctx(xctx, &cur_rsc, qp_id); + if (unlikely(err)) + return CQ_POLL_ERR; + xqp = rsc_to_xqp(cur_rsc); + wq = &xqp->rq; + + ++wq->tail; + wq->flush_wqe_cnt--; + if (cq->verbs_cq.cq_ex.status != IBV_WC_WR_FLUSH_ERR) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } + xqp->ibv_qp->state = IBV_QPS_ERR; + break; + case XSC_OPCODE_RDMA_CQE_ERROR: + printf("%s: got completion with cqe format error:\n", xctx->hostname); + dump_cqe(cqe); + SWITCH_FALLTHROUGH; + default: + return CQ_POLL_ERR; + } + return CQ_OK; } static inline int xsc_poll_one(struct xsc_cq *cq, struct xsc_resource **cur_rsc, - struct ibv_wc *wc) + struct ibv_wc *wc, + int lazy) ALWAYS_INLINE; static inline int xsc_poll_one(struct xsc_cq *cq, struct xsc_resource **cur_rsc, - struct ibv_wc *wc) + struct ibv_wc *wc, + int lazy) { struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); - if (cqe == NULL) { + int err = 0; + + if (!cqe) return CQ_EMPTY; - } - memset(wc, 0, sizeof(*wc)); ++cq->cons_index; @@ -472,7 +496,12 @@ static inline int xsc_poll_one(struct xsc_cq *cq, * ownership bit. */ udma_from_device_barrier(); - return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); + if (!lazy) + err = xsc_parse_cqe(cq, cqe, cur_rsc, wc); + else + err = xsc_parse_cqe_lazy(cq, cqe); + + return err; } static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, @@ -500,10 +529,12 @@ static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, wc->qp_num = qp_id; wc->status = IBV_WC_WR_FLUSH_ERR; - wc->vendor_err = XSC_ERR_CODE_FLUSH; + wc->vendor_err = XSC_ANDES_ERR_CODE_FLUSH; wc->wr_id = wq->wrid[idx]; wq->tail++; wq->flush_wqe_cnt--; + if (err_node->is_sq) + wq->need_flush[idx] = 0; } static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, @@ -578,9 +609,14 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) int err = CQ_OK; uint32_t next_cid = cq->cons_index; + if (cq->stall_enable && cq->stall_next_poll) { + cq->stall_next_poll = 0; + xsc_stall_poll_cq(); + } + xsc_spin_lock(&cq->lock); for (npolled = 0; npolled < ne; ++npolled) { - err = xsc_poll_one(cq, &rsc, wc + npolled); + err = xsc_poll_one(cq, &rsc, wc + npolled, 0); if (err != CQ_OK) break; } @@ -596,677 +632,148 @@ static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) update_cons_index(cq); xsc_spin_unlock(&cq->lock); - return err == CQ_POLL_ERR ? err : npolled; -} - -enum polling_mode { - POLLING_MODE_NO_STALL, - POLLING_MODE_STALL, - POLLING_MODE_STALL_ADAPTIVE -}; + if (cq->stall_enable && err == CQ_EMPTY) + cq->stall_next_poll = 1; -static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, - int lock, enum polling_mode stall) - ALWAYS_INLINE; -static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, - int lock, enum polling_mode stall) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - - update_cons_index(cq); - - if (lock) - xsc_spin_unlock(&cq->lock); - - if (stall) { - if (stall == POLLING_MODE_STALL_ADAPTIVE) { - if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { - cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, - xsc_stall_cq_poll_min); - xsc_get_cycles(&cq->stall_last_count); - } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { - cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, - xsc_stall_cq_poll_max); - xsc_get_cycles(&cq->stall_last_count); - } else { - cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, - xsc_stall_cq_poll_min); - cq->stall_last_count = 0; - } - } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { - cq->stall_next_poll = 1; - } - - cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); - } + return err == CQ_POLL_ERR ? err : npolled; } -static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, - int lock, enum polling_mode stall, - int cqe_version, int clock_update) - ALWAYS_INLINE; -static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, - int lock, enum polling_mode stall, - int cqe_version, int clock_update) +int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - struct xsc_cqe64 *cqe64; - void *cqe; - int err; - - if (unlikely(attr->comp_mask)) - return EINVAL; - - if (stall) { - if (stall == POLLING_MODE_STALL_ADAPTIVE) { - if (cq->stall_last_count) - xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); - } else if (cq->stall_next_poll) { - cq->stall_next_poll = 0; - xsc_stall_poll_cq(); - } - } - - if (lock) - xsc_spin_lock(&cq->lock); - - cq->cur_rsc = NULL; - - err = xsc_get_next_cqe(cq, &cqe64, &cqe); - if (err == CQ_EMPTY) { - if (lock) - xsc_spin_unlock(&cq->lock); - - if (stall) { - if (stall == POLLING_MODE_STALL_ADAPTIVE) { - cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, - xsc_stall_cq_poll_min); - xsc_get_cycles(&cq->stall_last_count); - } else { - cq->stall_next_poll = 1; - } - } - - return ENOENT; - } - - if (stall) - cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; - - err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); - if (lock && err) - xsc_spin_unlock(&cq->lock); - - if (stall && err) { - if (stall == POLLING_MODE_STALL_ADAPTIVE) { - cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, - xsc_stall_cq_poll_min); - cq->stall_last_count = 0; - } - - cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); - - goto out; - } - - if (clock_update && !err) - err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); - -out: - return err; + return poll_cq(ibcq, ne, wc); } -static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, - enum polling_mode stall, int cqe_version) +static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) ALWAYS_INLINE; -static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, - enum polling_mode stall, - int cqe_version) +static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) { struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - struct xsc_cqe64 *cqe64; - void *cqe; int err; - err = xsc_get_next_cqe(cq, &cqe64, &cqe); - if (err == CQ_EMPTY) { - if (stall == POLLING_MODE_STALL_ADAPTIVE) - cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; - - return ENOENT; - } - - return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); -} - -static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) -{ - return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); -} - -static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) -{ - return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); -} - -static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) -{ - return xsc_next_poll(ibcq, 0, 0); -} - -static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) -{ - return xsc_next_poll(ibcq, 0, 1); -} - -static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); -} - -static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); -} - -static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); -} - -static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); -} - -static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); -} - -static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); -} - -static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); -} - -static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); -} - -static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); -} - -static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); -} - -static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); -} - -static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); -} - -static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); -} - -static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); -} - -static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); -} - -static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); -} - -static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); -} - -static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); -} - -static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); -} - -static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); -} - -static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); -} - -static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); -} - -static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); -} - -static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, - struct ibv_poll_cq_attr *attr) -{ - return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); -} - -static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); -} - -static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); -} - -static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); -} + xsc_spin_lock(&cq->lock); + err = xsc_poll_one(cq, NULL, NULL, 1); + if (err == CQ_EMPTY) + xsc_spin_unlock(&cq->lock); -static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); + return (err == CQ_EMPTY) ? ENOENT : err; } static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 0, 0); -} - -static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) -{ - _xsc_end_poll(ibcq, 1, 0); -} - -int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) -{ - return poll_cq(ibcq, ne, wc); -} - -static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - - switch (xscdv_get_cqe_opcode(cq->cqe64)) { - case XSC_CQE_RESP_WR_IMM: - return IBV_WC_RECV_RDMA_WITH_IMM; - case XSC_CQE_RESP_SEND: - case XSC_CQE_RESP_SEND_IMM: - case XSC_CQE_RESP_SEND_INV: - if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { - switch (cq->cqe64->app_op) { - case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: - case XSC_CQE_APP_OP_TM_CONSUMED_MSG: - case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: - case XSC_CQE_APP_OP_TM_EXPECTED: - case XSC_CQE_APP_OP_TM_UNEXPECTED: - return IBV_WC_TM_RECV; - case XSC_CQE_APP_OP_TM_NO_TAG: - return IBV_WC_TM_NO_TAG; - } - } - return IBV_WC_RECV; - case XSC_CQE_NO_PACKET: - switch (cq->cqe64->app_op) { - case XSC_CQE_APP_OP_TM_REMOVE: - return IBV_WC_TM_DEL; - case XSC_CQE_APP_OP_TM_APPEND: - return IBV_WC_TM_ADD; - case XSC_CQE_APP_OP_TM_NOOP: - return IBV_WC_TM_SYNC; - case XSC_CQE_APP_OP_TM_CONSUMED: - return IBV_WC_TM_RECV; - } - break; - case XSC_CQE_REQ: - switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { - case XSC_OPCODE_RDMA_WRITE_IMM: - case XSC_OPCODE_RDMA_WRITE: - return IBV_WC_RDMA_WRITE; - case XSC_OPCODE_SEND_IMM: - case XSC_OPCODE_SEND: - case XSC_OPCODE_SEND_INVAL: - return IBV_WC_SEND; - case XSC_OPCODE_RDMA_READ: - return IBV_WC_RDMA_READ; - case XSC_OPCODE_ATOMIC_CS: - return IBV_WC_COMP_SWAP; - case XSC_OPCODE_ATOMIC_FA: - return IBV_WC_FETCH_ADD; - case XSC_OPCODE_UMR: - return cq->umr_opcode; - case XSC_OPCODE_TSO: - return IBV_WC_TSO; - } - } - - return 0; -} - -static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) + ALWAYS_INLINE; +static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) { struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; + udma_to_device_barrier(); + update_cons_index(cq); + xsc_spin_unlock(&cq->lock); } -static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) +static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) + ALWAYS_INLINE; +static inline int xsc_next_poll(struct ibv_cq_ex *ibcq) { struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - int wc_flags = 0; - - if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) - wc_flags = get_csum_ok(cq->cqe64); - - switch (xscdv_get_cqe_opcode(cq->cqe64)) { - case XSC_CQE_RESP_WR_IMM: - case XSC_CQE_RESP_SEND_IMM: - wc_flags |= IBV_WC_WITH_IMM; - break; - case XSC_CQE_RESP_SEND_INV: - wc_flags |= IBV_WC_WITH_INV; - break; - } - - if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) - wc_flags |= IBV_WC_TM_SYNC_REQ; + int err; - if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { - switch (cq->cqe64->app_op) { - case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: - case XSC_CQE_APP_OP_TM_CONSUMED_MSG: - case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: - /* Full completion */ - wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); - break; - case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: - case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ - wc_flags |= IBV_WC_TM_MATCH; - break; - case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ - wc_flags |= IBV_WC_TM_DATA_VALID; - break; - } - } + err = xsc_poll_one(cq, NULL, NULL, 1); - wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; - return wc_flags; + return (err == CQ_EMPTY) ? ENOENT : err; } -static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) +static inline enum ibv_wc_opcode xsc_wc_read_opcode(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); + uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); - return be32toh(cq->cqe64->byte_cnt); + return xsc_cqe_opcode[opcode]; } -static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) +static inline uint32_t xsc_wc_read_qp_num(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; - return ecqe->vendor_err_synd; + return le32toh(cqe->qp_id); } -static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) +static inline unsigned int xsc_wc_read_flags(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); + uint8_t opcode = xsc_hw_get_cqe_msg_opcode(xctx->device_id, cqe); - switch (xscdv_get_cqe_opcode(cq->cqe64)) { - case XSC_CQE_RESP_SEND_INV: - /* This is returning invalidate_rkey which is in host order, see - * ibv_wc_read_invalidated_rkey - */ - return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); + switch (opcode) { + case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: + case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: + case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: + case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: + return IBV_WC_WITH_IMM; default: - return cq->cqe64->imm_inval_pkey; + return 0; } } -static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) +static inline uint32_t xsc_wc_read_byte_len(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; - return (uint32_t)be16toh(cq->cqe64->slid); + return le32toh(cqe->msg_len); } -static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) +static inline uint32_t xsc_wc_read_vendor_err(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + struct xsc_context *xctx = to_xctx(ibv_cq_ex_to_cq(ibcq)->context); - return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; + return xsc_hw_get_cqe_err_code(xctx->device_id, cqe); } -static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) +static inline __be32 xsc_wc_read_imm_data(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; + __be32 imm_data; - return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; -} - -static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + WR_BE_32(imm_data, RD_LE_32(cqe->imm_data)); - return cq->cqe64->ml_path & 0x7f; + return imm_data; } -static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) +static inline uint64_t xsc_wc_read_completion_ts(struct ibv_cq_ex *ibcq) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe *cqe = to_xcq(ibv_cq_ex_to_cq(ibcq))->cqe; - return be64toh(cq->cqe64->timestamp); + return le64toh(cqe->ts); } -static inline uint64_t -xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) +void xsc_cq_fill_pfns(struct xsc_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) { - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - - return xscdv_ts_to_ns(&cq->last_clock_info, - xsc_cq_read_wc_completion_ts(ibcq)); -} -static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + cq->verbs_cq.cq_ex.start_poll = xsc_start_poll; + cq->verbs_cq.cq_ex.next_poll = xsc_next_poll; + cq->verbs_cq.cq_ex.end_poll = xsc_end_poll; - return be16toh(cq->cqe64->vlan_info); -} - -static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - - return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; -} - -static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, - struct ibv_wc_tm_info *tm_info) -{ - struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); - - tm_info->tag = be64toh(cq->cqe64->tmh.tag); - tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); -} - -#define BIT(i) (1UL << (i)) - -#define SINGLE_THREADED BIT(0) -#define STALL BIT(1) -#define V1 BIT(2) -#define ADAPTIVE BIT(3) -#define CLOCK_UPDATE BIT(4) - -#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ - xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update -#define xsc_next_poll_name(cqe_ver, adaptive) \ - xsc_next_poll##adaptive##cqe_ver -#define xsc_end_poll_name(lock, stall, adaptive) \ - xsc_end_poll##adaptive##stall##lock - -#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ - .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ - .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ - .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ - } - -static const struct op -{ - int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); - int (*next_poll)(struct ibv_cq_ex *ibcq); - void (*end_poll)(struct ibv_cq_ex *ibcq); -} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { - [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), - [0] = POLL_FN_ENTRY(_v0, _lock, , ,), - [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), - [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), - [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), - [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), - [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), - [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), - [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), - [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), - [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), - [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), - [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), - [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), - [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), - [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), - [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), - [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), - [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), - [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), - [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), - [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), - [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), - [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), -}; - -int xsc_cq_fill_pfns(struct xsc_cq *cq, - const struct ibv_cq_init_attr_ex *cq_attr, - struct xsc_context *xctx) -{ - const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | - (xctx->cqe_version ? V1 : 0) | - (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? - SINGLE_THREADED : 0) | - (cq->stall_enable ? STALL : 0) | - ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? - CLOCK_UPDATE : 0)]; - - cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; - cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; - cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; - - cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; - cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; - cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; + cq->verbs_cq.cq_ex.read_opcode = xsc_wc_read_opcode; + cq->verbs_cq.cq_ex.read_vendor_err = xsc_wc_read_vendor_err; + cq->verbs_cq.cq_ex.read_wc_flags = xsc_wc_read_flags; if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) - cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; + cq->verbs_cq.cq_ex.read_byte_len = xsc_wc_read_byte_len; if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) - cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; + cq->verbs_cq.cq_ex.read_imm_data = xsc_wc_read_imm_data; if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) - cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) - cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) - cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) - cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) - cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; + cq->verbs_cq.cq_ex.read_qp_num = xsc_wc_read_qp_num; if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) - cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) - cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) - cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) - cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; - if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { - if (!xctx->clock_info_page) - return EOPNOTSUPP; - cq->verbs_cq.cq_ex.read_completion_wallclock_ns = - xsc_cq_read_wc_completion_wallclock_ns; - } - - return 0; + cq->verbs_cq.cq_ex.read_completion_ts = xsc_wc_read_completion_ts; } int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) { struct xsc_cq *cq = to_xcq(ibvcq); - union xsc_db_data doorbell; - - doorbell.cqn = cq->cqn; - doorbell.cq_next_cid = cq->cons_index; - doorbell.solicited = !!solicited; - - /* - * Make sure that the doorbell record in host memory is - * written before ringing the doorbell via PCI WC MMIO. - */ - mmio_wc_start(); - - WR_REG(cq->armdb, doorbell.raw_data); + struct xsc_context *ctx = to_xctx(ibvcq->context); - mmio_flush_writes(); + xsc_hw_update_cq_db(ctx->device_id, cq->armdb, cq->cqn, cq->cons_index, solicited); return 0; } diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c index 04e87e2..ea9ecb5 100644 --- a/providers/xscale/qp.c +++ b/providers/xscale/qp.c @@ -10,12 +10,12 @@ #include #include #include -#include #include #include "xscale.h" #include "wqe.h" #include "xsc_hsi.h" +#include "xsc_hw.h" static const uint32_t xsc_ib_opcode[] = { [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, @@ -26,26 +26,21 @@ static const uint32_t xsc_ib_opcode[] = { [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, }; -static void *get_recv_wqe(struct xsc_qp *qp, int n) +static inline void *get_recv_wqe(struct xsc_qp *qp, int n) { return qp->rq_start + (n << qp->rq.wqe_shift); } -static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) +static inline void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) { return rwq->pbuff + (n << rwq->rq.wqe_shift); } -static void *get_seg_wqe(void *first, int n) +static inline void *get_seg_wqe(void *first, int n) { return first + (n << XSC_BASE_WQE_SHIFT); } -void *xsc_get_send_wqe(struct xsc_qp *qp, int n) -{ - return qp->sq_start + (n << qp->sq.wqe_shift); -} - void xsc_init_rwq_indices(struct xsc_rwq *rwq) { rwq->rq.head = 0; @@ -61,7 +56,7 @@ void xsc_init_qp_indices(struct xsc_qp *qp) qp->sq.cur_post = 0; } -static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) +static inline int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) { unsigned cur; @@ -76,65 +71,72 @@ static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) return cur + nreq >= wq->max_post; } -static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, - uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) +static inline void set_data_seg_with_value(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, + uint64_t addr, uint32_t key, uint32_t length) { - WR_LE_32(remote_seg->seg_len, msg_len); - WR_LE_32(remote_seg->mkey, rkey); - WR_LE_64(remote_seg->va, remote_addr); + struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); + + xsc_hw_set_data_seg(ctx->device_id, data_seg, addr, key, length); } -static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) +static inline void set_local_data_seg_from_sge(struct xsc_qp *qp, struct xsc_wqe_data_seg *data_seg, + const struct ibv_sge *sg) { - WR_LE_32(data_seg->seg_len, sg->length); - WR_LE_32(data_seg->mkey, sg->lkey); - WR_LE_64(data_seg->va, sg->addr); + struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); + + xsc_hw_set_data_seg(ctx->device_id, data_seg, sg->addr, sg->lkey, sg->length); } -static __be32 send_ieth(struct ibv_send_wr *wr) +static void *get_addr_from_wr(const void *list, int idx) { - switch (wr->opcode) { - case IBV_WR_SEND_WITH_IMM: - case IBV_WR_RDMA_WRITE_WITH_IMM: - return wr->imm_data; - default: - return 0; - } + const struct ibv_send_wr *wr = list; + + return (void *)wr->sg_list[idx].addr; } -static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, - struct xsc_send_wqe_ctrl_seg *ctrl) +static int get_len_from_wr(const void *list, int idx) { - void *data_seg; - unsigned seg_index; - void *addr; - int len = 0; - int i; - const int ds_len = sizeof(struct xsc_wqe_data_seg); - int left_len = 0; - int msg_len = ctrl->msg_len; + const struct ibv_send_wr *wr = list; + return wr->sg_list[idx].length; +} - if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) - seg_index = 1; - else - seg_index = 2; +static void *get_addr_from_buf_list(const void *list, int idx) +{ + const struct ibv_data_buf *buf_list = list; + return buf_list[idx].addr; +} - if (unlikely(msg_len > qp->max_inline_data)) - return ENOMEM; +static int get_len_from_wr_list(const void *list, int idx) +{ + const struct ibv_data_buf *buf_list = list; + return buf_list[idx].length; +} + +static int _set_wqe_inline(void *data_seg, size_t num_buf, const void *list, + void *(*get_addr)(const void *, int), + int (*get_len)(const void *, int)) +{ + int i; + int ds_left_len = 0; + int len = 0; + void *addr; + void *data_seg_base = data_seg; + int seg_index = 0; + const int ds_len = sizeof(struct xsc_wqe_data_seg); - for (i = 0; i < wr->num_sge; ++i) { - if (likely(wr->sg_list[i].length)) { - addr = (void*)wr->sg_list[i].addr; - len = wr->sg_list[i].length; - if (left_len > 0) { - int copy_len = min_t(int, len, left_len); + for (i = 0; i < num_buf; i++) { + addr = get_addr(list, i); + len = get_len(list, i); + if (likely(len)) { + if (ds_left_len > 0) { + int copy_len = min_t(int, len, ds_left_len); memcpy(data_seg, addr, copy_len); addr += copy_len; len -= copy_len; } while (len >= ds_len) { - data_seg = get_seg_wqe(ctrl, seg_index); + data_seg = get_seg_wqe(data_seg_base, seg_index); seg_index++; memcpy(data_seg, addr, ds_len); addr += ds_len; @@ -142,43 +144,84 @@ static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, } if (len > 0) { - data_seg = get_seg_wqe(ctrl, seg_index); + data_seg = get_seg_wqe(data_seg_base, seg_index); seg_index++; memcpy(data_seg, addr, len); data_seg += len; - left_len = ds_len - len; + ds_left_len = ds_len - len; } else { - left_len = 0; + ds_left_len = 0; } } } + return seg_index; +} + +static int set_wqe_inline_from_wr(struct xsc_qp *qp, struct ibv_send_wr *wr, + struct xsc_send_wqe_ctrl_seg *ctrl) +{ + void *data_seg; + unsigned seg_index; + int msg_len = ctrl->msg_len; + int filled_ds_num; + + if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) + seg_index = 1; + else + seg_index = 2; + data_seg = get_seg_wqe(ctrl, seg_index); - ctrl->ds_data_num = seg_index - 1; + if (unlikely(msg_len > qp->max_inline_data)) + return ENOMEM; + + filled_ds_num = _set_wqe_inline(data_seg, wr->num_sge, wr, + get_addr_from_wr, + get_len_from_wr); + ctrl->ds_data_num = seg_index - 1 + filled_ds_num; return 0; } -static void zero_send_ds(int idx, struct xsc_qp *qp) +static int set_wqe_inline_from_buf_list(void *data_seg, + size_t num_buf, + const struct ibv_data_buf *buf_list) +{ + return _set_wqe_inline(data_seg, num_buf, buf_list, + get_addr_from_buf_list, + get_len_from_wr_list); +} + +static inline void _zero_send_ds(int idx, struct xsc_qp *qp, int keep_ctrl) { void *seg; uint64_t *uninitialized_var(p); int i; seg = (void*)xsc_get_send_wqe(qp, idx); - for (i = 1; i < qp->sq.seg_cnt; i++) { + for (i = keep_ctrl; i < qp->sq.seg_cnt; i++) { p = get_seg_wqe(seg, i); p[0] = p[1] = 0; } } -static void zero_recv_ds(int idx, struct xsc_qp *qp) +static inline void clear_send_wqe(int idx, struct xsc_qp *qp) +{ + _zero_send_ds(idx, qp, 0); +} + +static inline void clear_send_wqe_except_ctrl(int idx, struct xsc_qp *qp) +{ + _zero_send_ds(idx, qp, 1); +} + +static void clear_recv_wqe(int idx, struct xsc_qp *qp) { void *seg; uint64_t *uninitialized_var(p); int i; seg = (void*)get_recv_wqe(qp, idx); - for (i = 1; i < qp->rq.seg_cnt; i++) { + for (i = 0; i < qp->rq.seg_cnt; i++) { p = get_seg_wqe(seg, i); p[0] = p[1] = 0; } @@ -221,23 +264,16 @@ static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) { - uint16_t next_pid; - union xsc_db_data db; + struct xsc_context *ctx = to_xctx(qp->ibv_qp->context); + uint32_t next_pid; if (unlikely(!nreq)) return; qp->sq.head += nreq; next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); - db.sq_next_pid = next_pid; - db.sqn = qp->sqn; - /* - * Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); - udma_to_device_barrier(); - WR_REG(qp->sq.db, db.raw_data); + xsc_hw_ring_tx_doorbell(ctx->device_id, qp->sq.db, qp->sqn, next_pid); } static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, @@ -305,7 +341,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, } idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - zero_send_ds(idx, qp); + clear_send_wqe(idx, qp); ctrl = seg = xsc_get_send_wqe(qp, idx); ctrl->ds_data_num = 0; WR_LE_16(ctrl->wqe_id, @@ -337,11 +373,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, break; case IBV_WR_SEND_WITH_IMM: ctrl->with_immdt = 1; - ctrl->opcode_data = send_ieth(wr); + WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); break; case IBV_WR_RDMA_WRITE_WITH_IMM: ctrl->with_immdt = 1; - ctrl->opcode_data = send_ieth(wr); + WR_LE_32(ctrl->opcode_data, RD_BE_32(wr->imm_data)); SWITCH_FALLTHROUGH; case IBV_WR_RDMA_READ: case IBV_WR_RDMA_WRITE: @@ -349,11 +385,11 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, break; ctrl->ds_data_num++; data_seg = get_seg_wqe(ctrl, seg_index); - set_remote_addr_seg( - data_seg, - msg_len, - wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_data_seg_with_value(qp, + data_seg, + wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey, + msg_len); seg_index++; break; default: @@ -372,7 +408,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, } if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { - err = set_data_inl_seg(qp, wr, ctrl); + err = set_wqe_inline_from_wr(qp, wr, ctrl); if (unlikely(err)) { *bad_wr = wr; xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, @@ -383,7 +419,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, for (i = 0; i < wr->num_sge; ++i, ++seg_index) { if (likely(wr->sg_list[i].length)) { data_seg = get_seg_wqe(ctrl, seg_index); - set_local_data_seg(data_seg, &wr->sg_list[i]); + set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); ctrl->ds_data_num++; } } @@ -392,7 +428,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; if (ctrl->msg_len == 0) { ctrl->ds_data_num = 0; - zero_send_ds(idx, qp); + clear_send_wqe_except_ctrl(idx, qp); } qp->sq.wrid[idx] = wr->wr_id; qp->sq.wqe_head[idx] = qp->sq.head + nreq; @@ -403,7 +439,7 @@ static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, } qp->sq.wr_opcode[idx] = wr->opcode; - if (xsc_debug_mask & XSC_DBG_QP_SEND) + if (unlikely(xsc_debug_mask & XSC_DBG_QP_SEND)) dump_wqe(0, idx, qp); } @@ -420,6 +456,301 @@ int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, return _xsc_post_send(ibqp, wr, bad_wr); } +static inline void xsc_wr_start(struct ibv_qp_ex *ibqp) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + + xsc_spin_lock(&qp->sq.lock); + + qp->cur_post_rb = qp->sq.cur_post; + qp->err = 0; + qp->nreq = 0; +} + +static inline int xsc_wr_complete(struct ibv_qp_ex *ibqp) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + int err = qp->err; + + if (unlikely(err)) { + qp->sq.cur_post = qp->cur_post_rb; + goto out; + } + + xsc_post_send_db(qp, qp->nreq); +out: + xsc_spin_unlock(&qp->sq.lock); + return err; +} + +static inline void xsc_wr_abort(struct ibv_qp_ex *ibqp) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + + qp->sq.cur_post = qp->cur_post_rb; + + xsc_spin_unlock(&qp->sq.lock); +} + +#define RDMA_REMOTE_DATA_SEG_IDX 1 +static const int local_ds_base_idx[] = { + [IBV_WR_RDMA_WRITE] = 2, + [IBV_WR_RDMA_WRITE_WITH_IMM] = 2, + [IBV_WR_SEND] = 1, + [IBV_WR_SEND_WITH_IMM] = 1, + [IBV_WR_RDMA_READ] = 2 +}; + +static inline void _common_wqe_init(struct ibv_qp_ex *ibqp, + enum ibv_wr_opcode ib_op) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_send_wqe_ctrl_seg *ctrl; + uint32_t idx; + + if (unlikely(xsc_wq_overflow(&qp->sq, qp->nreq, + to_xcq(qp->ibv_qp->send_cq)))) { + xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, + "send work queue overflow\n"); + if (!qp->err) + qp->err = ENOMEM; + + return; + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + clear_send_wqe(idx, qp); + ctrl = xsc_get_send_wqe(qp, idx); + qp->cur_ctrl = ctrl; + qp->cur_ds_num = 0; + qp->cur_data_len = 0; + qp->cur_data = get_seg_wqe(ctrl, local_ds_base_idx[ib_op]); + qp->cur_remote_addr = 0; + qp->cur_remote_key = 0; + ctrl->msg_opcode = xsc_ib_opcode[ib_op]; + ctrl->ce = qp->sq_signal_bits ? 1 : (ibqp->wr_flags & IBV_SEND_SIGNALED ? 1 : 0); + ctrl->se = ibqp->wr_flags & IBV_SEND_SOLICITED ? 1 : 0; + ctrl->in_line = ibqp->wr_flags & IBV_SEND_INLINE ? 1 : 0; + qp->sq.wrid[idx] = ibqp->wr_id; + qp->sq.wqe_head[idx] = qp->sq.head + qp->nreq; + qp->sq.wr_opcode[idx] = ib_op; + WR_LE_16(ctrl->wqe_id, + qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); +} + +static inline void _common_wqe_finilize(struct ibv_qp_ex *ibqp) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_send_wqe_ctrl_seg *ctrl = qp->cur_ctrl; + struct xsc_wqe_data_seg *remote_seg; + uint32_t idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + + ctrl->ds_data_num = qp->cur_ds_num; + ctrl->msg_len = qp->cur_data_len; + if (ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_WRITE || + ctrl->msg_opcode == XSC_MSG_OPCODE_RDMA_READ) { + remote_seg = get_seg_wqe(qp->cur_ctrl, RDMA_REMOTE_DATA_SEG_IDX); + set_data_seg_with_value(qp, remote_seg, + qp->cur_remote_addr, + qp->cur_remote_key, + ctrl->msg_len); + } + + dump_wqe(0, idx, qp); + qp->sq.cur_post++; + qp->nreq++; + if (ctrl->ce) { + qp->sq.flush_wqe_cnt++; + qp->sq.need_flush[idx] = 1; + } +} + +static inline void xsc_wr_send(struct ibv_qp_ex *ibqp) +{ + _common_wqe_init(ibqp, IBV_WR_SEND); +} + +static inline void xsc_wr_send_imm(struct ibv_qp_ex *ibqp, __be32 imm_data) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_send_wqe_ctrl_seg *ctrl; + + _common_wqe_init(ibqp, IBV_WR_SEND_WITH_IMM); + ctrl = qp->cur_ctrl; + ctrl->with_immdt = 1; + WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); +} + +static inline void _xsc_wr_rdma(struct ibv_qp_ex *ibqp, + uint32_t rkey, + uint64_t remote_addr, + enum ibv_wr_opcode ib_op) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + + _common_wqe_init(ibqp, ib_op); + qp->cur_remote_addr = remote_addr; + qp->cur_remote_key = rkey; + qp->cur_ds_num++; +} + +static inline void xsc_wr_rdma_write(struct ibv_qp_ex *ibqp, uint32_t rkey, + uint64_t remote_addr) +{ + _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE); +} + +static inline void xsc_wr_rdma_write_imm(struct ibv_qp_ex *ibqp, uint32_t rkey, + uint64_t remote_addr, __be32 imm_data) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_send_wqe_ctrl_seg *ctrl; + + _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_WRITE_WITH_IMM); + ctrl = qp->cur_ctrl; + ctrl->with_immdt = 1; + WR_LE_32(ctrl->opcode_data, RD_BE_32(imm_data)); +} + +static inline void xsc_wr_rdma_read(struct ibv_qp_ex *ibqp, uint32_t rkey, + uint64_t remote_addr) +{ + _xsc_wr_rdma(ibqp, rkey, remote_addr, IBV_WR_RDMA_READ); +} + +static inline void xsc_wr_set_sge(struct ibv_qp_ex *ibqp, uint32_t lkey, uint64_t addr, + uint32_t length) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_wqe_data_seg *data_seg = qp->cur_data; + + if (unlikely(!length)) + return; + + set_data_seg_with_value(qp, data_seg, addr, lkey, length); + qp->cur_ds_num++; + qp->cur_data_len = length; + _common_wqe_finilize(ibqp); +} + +static inline void xsc_wr_set_sge_list(struct ibv_qp_ex *ibqp, size_t num_sge, + const struct ibv_sge *sg_list) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_wqe_data_seg *data_seg = qp->cur_data; + int i; + + if (unlikely(num_sge > qp->sq.max_gs)) { + xsc_dbg(to_xctx(ibqp->qp_base.context)->dbg_fp, XSC_DBG_QP_SEND, + "rdma read, max gs exceeded %lu (max = 1)\n", + num_sge); + if (!qp->err) + qp->err = ENOMEM; + return ; + } + + for (i = 0; i < num_sge; i++) { + if (unlikely(!sg_list[i].length)) + continue; + set_local_data_seg_from_sge(qp, data_seg, &sg_list[i]); + data_seg++; + qp->cur_ds_num++; + qp->cur_data_len += sg_list[i].length; + } + _common_wqe_finilize(ibqp); +} + +static inline void xsc_wr_set_inline_data(struct ibv_qp_ex *ibqp, void *addr, + size_t length) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_wqe_data_seg *data_seg = qp->cur_data; + size_t num_buf = 1; + struct ibv_data_buf data_buf = {.addr = addr, .length = length}; + int num_filled_ds = 0; + + if (unlikely(length > qp->max_inline_data)) { + if (!qp->err) + qp->err = ENOMEM; + return; + } + + num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, &data_buf); + + qp->cur_ds_num += num_filled_ds; + qp->cur_data_len = length; + _common_wqe_finilize(ibqp); +} + +static inline void xsc_wr_set_inline_data_list(struct ibv_qp_ex *ibqp, + size_t num_buf, + const struct ibv_data_buf *buf_list) +{ + struct xsc_qp *qp = to_xqp((struct ibv_qp *)ibqp); + struct xsc_wqe_data_seg *data_seg = qp->cur_data; + int num_filled_ds = 0; + int i; + size_t total_len = 0; + + for (i = 0; i < num_buf; i++) + total_len += buf_list[i].length; + if (unlikely(total_len > qp->max_inline_data)) { + if (!qp->err) + qp->err = ENOMEM; + return; + } + + num_filled_ds = set_wqe_inline_from_buf_list(data_seg, num_buf, buf_list); + + qp->cur_ds_num += num_filled_ds; + qp->cur_data_len = total_len; + _common_wqe_finilize(ibqp); +} + +enum { + XSC_SUPPORTED_SEND_OPS_FLAGS_RC = + IBV_QP_EX_WITH_SEND | + IBV_QP_EX_WITH_SEND_WITH_IMM | + IBV_QP_EX_WITH_RDMA_WRITE | + IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | + IBV_QP_EX_WITH_RDMA_READ, +}; + +static void fill_wr_pfns_rc(struct ibv_qp_ex *ibqp) +{ + ibqp->wr_send = xsc_wr_send; + ibqp->wr_send_imm = xsc_wr_send_imm; + ibqp->wr_rdma_write = xsc_wr_rdma_write; + ibqp->wr_rdma_write_imm = xsc_wr_rdma_write_imm; + ibqp->wr_rdma_read = xsc_wr_rdma_read; + + ibqp->wr_set_sge = xsc_wr_set_sge; + ibqp->wr_set_sge_list = xsc_wr_set_sge_list; + ibqp->wr_set_inline_data = xsc_wr_set_inline_data; + ibqp->wr_set_inline_data_list = xsc_wr_set_inline_data_list; +} + +int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr) +{ + struct ibv_qp_ex *ibqp = &xqp->verbs_qp.qp_ex; + uint64_t ops = attr->send_ops_flags; + + ibqp->wr_start = xsc_wr_start; + ibqp->wr_complete = xsc_wr_complete; + ibqp->wr_abort = xsc_wr_abort; + + switch (attr->qp_type) { + case IBV_QPT_RC: + if (ops & ~XSC_SUPPORTED_SEND_OPS_FLAGS_RC) + return EOPNOTSUPP; + fill_wr_pfns_rc(ibqp); + break; + default: + return EOPNOTSUPP; + } + return 0; +} + static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, int size, uint16_t idx) { @@ -506,6 +837,7 @@ out: return err; } +int xsc_post_recv_dump_wqe = 1; int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { @@ -513,8 +845,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct xsc_wqe_data_seg *recv_head; struct xsc_wqe_data_seg *data_seg; int err = 0; - uint16_t next_pid = 0; - union xsc_db_data db; + uint32_t next_pid = 0; int nreq; uint16_t idx; int i; @@ -523,7 +854,7 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, idx = qp->rq.head & (qp->rq.wqe_cnt - 1); - zero_recv_ds(idx, qp); + clear_recv_wqe(idx, qp); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(xsc_wq_overflow(&qp->rq, nreq, to_xcq(qp->ibv_qp->recv_cq)))) { @@ -547,31 +878,23 @@ int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, if (unlikely(!wr->sg_list[i].length)) continue; data_seg = get_seg_wqe(recv_head, i); - WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); - WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); - WR_LE_64(data_seg->va, wr->sg_list[i].addr); + set_local_data_seg_from_sge(qp, data_seg, &wr->sg_list[i]); } qp->rq.wrid[idx] = wr->wr_id; - dump_wqe(1, idx, qp); + if (xsc_post_recv_dump_wqe || (xsc_debug_mask & XSC_DBG_QP_RECV)) + dump_wqe(1, idx, qp); idx = (idx + 1) & (qp->rq.wqe_cnt - 1); qp->rq.flush_wqe_cnt++; } out: if (likely(nreq)) { + struct xsc_context *ctx = to_xctx(ibqp->context); qp->rq.head += nreq; next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); - db.rq_next_pid = next_pid; - db.rqn = qp->rqn; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - udma_to_device_barrier(); - WR_REG(qp->rq.db, db.raw_data); + xsc_hw_ring_rx_doorbell(ctx->device_id, qp->rq.db, qp->rqn, next_pid); } xsc_spin_unlock(&qp->rq.lock); @@ -676,3 +999,4 @@ int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, } return ret; } + diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c index 937bed1..602ca9d 100644 --- a/providers/xscale/verbs.c +++ b/providers/xscale/verbs.c @@ -213,7 +213,6 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, &mr->vmr, &cmd, sizeof(cmd), &resp, sizeof resp); if (ret) { - xsc_free_buf(&(mr->buf)); free(mr); return NULL; } @@ -225,6 +224,27 @@ struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, return &mr->vmr.ibv_mr; } +struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int acc) +{ + struct xsc_mr *mr; + int ret; + + mr = calloc(1, sizeof(*mr)); + if (!mr) + return NULL; + + ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, + &mr->vmr); + if (ret) { + free(mr); + return NULL; + } + mr->alloc_flags = acc; + + return &mr->vmr.ibv_mr; +} + struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) { struct xsc_mr *mr; @@ -291,17 +311,6 @@ struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, return &mr->vmr.ibv_mr; } -int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, - void *addr, size_t length, int access) -{ - struct ibv_rereg_mr cmd; - struct ib_uverbs_rereg_mr_resp resp; - - return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, - access, pd, &cmd, sizeof(cmd), &resp, - sizeof(resp)); -} - int xsc_dereg_mr(struct verbs_mr *vmr) { int ret; @@ -339,12 +348,8 @@ static int align_queue_size(long long req) } enum { - CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | - IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | - IBV_WC_EX_WITH_CVLAN | - IBV_WC_EX_WITH_FLOW_TAG | - IBV_WC_EX_WITH_TM_INFO | - IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK + CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP }; enum { @@ -417,7 +422,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, } if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { - xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); + xsc_err("unsupported wc flags:0x%lx\n", cq_attr->wc_flags); errno = ENOTSUP; return NULL; } @@ -453,16 +458,16 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, ncqe = XSC_CQE_RING_DEPTH_MIN; } - if (ncqe > XSC_CQE_RING_DEPTH_MAX) { + if (ncqe > xctx->max_cqe) { if (xsc_cqe_depth_check()) { xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", - ncqe, XSC_CQE_RING_DEPTH_MAX); + ncqe, xctx->max_cqe); errno = EINVAL; goto err_spl; } else { xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", - ncqe, XSC_CQE_RING_DEPTH_MAX); - ncqe = XSC_CQE_RING_DEPTH_MAX; + ncqe, xctx->max_cqe); + ncqe = xctx->max_cqe; } } @@ -485,6 +490,9 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); + if (cq_alloc_flags & XSC_CQ_FLAGS_EXTENDED) + xsc_cq_fill_pfns(cq, cq_attr); + if (use_ex) { struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; @@ -630,6 +638,7 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, int wqe_size; int wq_size; int wq_size_min = 0; + int max_inline_cap; if (!attr->cap.max_send_wr) return 0; @@ -646,23 +655,34 @@ static int xsc_calc_sq_size(struct xsc_context *ctx, wq_size = wq_size_min; } - if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { - xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, - "WQE size %u exceeds WQE ring depth, set it as %u\n", - wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); - wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; + if (wq_size > ctx->max_send_wqebb) { + if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || + ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { + xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); + return -EINVAL; + } else { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "WQE size %u exceeds WQE ring depth, set it as %u\n", + wq_size, ctx->max_send_wqebb); + wq_size = ctx->max_send_wqebb; + } } - qp->max_inline_data = attr->cap.max_inline_data; qp->sq.wqe_cnt = wq_size; qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; qp->sq.seg_cnt = 1 << ctx->send_ds_shift; qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; qp->sq.max_gs = attr->cap.max_send_sge; qp->sq.max_post = qp->sq.wqe_cnt; - if (attr->cap.max_inline_data > - (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) + + if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || + ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) + max_inline_cap = 64; + else + max_inline_cap = (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg); + if (attr->cap.max_inline_data > max_inline_cap) return -EINVAL; + qp->max_inline_data = attr->cap.max_inline_data; xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); @@ -743,11 +763,17 @@ static int xsc_calc_rq_size(struct xsc_context *ctx, wq_size = wq_size_min; } - if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { - xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, - "WQE size %u exceeds WQE ring depth, set it as %u\n", - wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); - wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; + if (wq_size > ctx->max_recv_wr) { + if (ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND || + ctx->device_id == XSC_MC_PF_DEV_ID_DIAMOND_NEXT) { + xsc_err("WQE size %u exceeds WQE ring depth\n", wq_size); + return -EINVAL; + } else { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "WQE size %u exceeds WQE ring depth, set it as %u\n", + wq_size, ctx->max_recv_wr); + wq_size = ctx->max_recv_wr; + } } qp->rq.wqe_cnt = wq_size; @@ -946,8 +972,10 @@ static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) } enum { - XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | - IBV_QP_INIT_ATTR_CREATE_FLAGS + XSC_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | + IBV_QP_INIT_ATTR_CREATE_FLAGS | + IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | + IBV_QP_INIT_ATTR_MAX_TSO_HEADER), }; enum { @@ -971,6 +999,34 @@ enum { XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), }; +static int xsc_cmd_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr, + struct xsc_create_qp *cmd, + struct xsc_qp *qp, + struct xsc_create_qp_resp *resp, + struct xsc_create_qp_ex_resp *resp_ex) +{ + struct xsc_create_qp_ex cmd_ex; + int ret; + + if (attr->comp_mask & XSC_CREATE_QP_EX2_COMP_MASK) { + memset(&cmd_ex, 0, sizeof(cmd_ex)); + *ibv_create_qp_ex_to_reg(&cmd_ex.ibv_cmd) = cmd->ibv_cmd.core_payload; + cmd_ex.drv_payload = cmd->drv_payload; + + ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, + attr, &cmd_ex.ibv_cmd, + sizeof(cmd_ex), &resp_ex->ibv_resp, + sizeof(*resp_ex)); + } else { + ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, + &cmd->ibv_cmd, sizeof(*cmd), + &resp->ibv_resp, sizeof(*resp)); + } + + return ret; +} + static struct ibv_qp *create_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr, struct xscdv_qp_init_attr *xqp_attr) @@ -992,19 +1048,35 @@ static struct ibv_qp *create_qp(struct ibv_context *context, return NULL; } + /*check qp_type*/ + if ((attr->qp_type != IBV_QPT_RC) && + (attr->qp_type != IBV_QPT_RAW_PACKET)){ + xsc_err("Not supported qp_type:0x%x\n", attr->qp_type); + return NULL; + } + qp = calloc(1, sizeof(*qp)); if (!qp) { xsc_err("QP calloc failed\n"); return NULL; } - ibqp = (struct ibv_qp *)&qp->verbs_qp; + ibqp = &qp->verbs_qp.qp; qp->ibv_qp = ibqp; memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); memset(&resp_ex, 0, sizeof(resp_ex)); + if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) { + ret = xsc_qp_fill_wr_pfns(qp, attr); + if (ret) { + errno = ret; + xsc_err("Fill wr pfns failed\n"); + goto err; + } + } + ret = xsc_calc_wq_size(ctx, attr, qp); if (ret < 0) { xsc_err("Calculate WQ size failed\n"); @@ -1056,17 +1128,28 @@ static struct ibv_qp *create_qp(struct ibv_context *context, "revert create_flags(0x%x) to cmd_flags(0x%x)\n", attr->create_flags, cmd.flags); } + + if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_SNIFFER) { + cmd.flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; + qp->flags |= XSC_QP_FLAG_RAWPACKET_SNIFFER; + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "revert create_flags(0x%x) to cmd_flags(0x%x)\n", + attr->create_flags, cmd.flags); + } + attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; } + + if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) + cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO; + } pthread_mutex_lock(&ctx->qp_table_mutex); xparent_domain = to_xparent_domain(attr->pd); - ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, - &cmd.ibv_cmd, sizeof(cmd), - &resp.ibv_resp, sizeof(resp)); + ret = xsc_cmd_create_qp_ex(context, attr, &cmd, qp, &resp, &resp_ex); if (ret) { xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); errno = ret; @@ -1108,6 +1191,9 @@ static struct ibv_qp *create_qp(struct ibv_context *context, qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); + if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) + qp->verbs_qp.comp_mask |= VERBS_QP_EX; + return ibqp; err_destroy: @@ -1261,6 +1347,11 @@ int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, init_attr->cap.max_inline_data = qp->max_inline_data; attr->cap = init_attr->cap; + if (qp->err_occurred) { + qp->err_occurred = 0; + qp->ibv_qp->state = IBV_QPS_ERR; + attr->qp_state = IBV_QPS_ERR; + } return 0; } diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h index c533019..3b3eafc 100644 --- a/providers/xscale/xsc_api.h +++ b/providers/xscale/xsc_api.h @@ -20,9 +20,9 @@ #define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL enum xsc_qp_create_flags { - XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, - XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, + XSC_QP_CREATE_RAWPACKET_SNIFFER = 1 << 2, + XSC_QP_CREATE_RAWPACKET_TX = 1 << 3, }; diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h index 53fe552..30887af 100644 --- a/providers/xscale/xsc_hsi.h +++ b/providers/xscale/xsc_hsi.h @@ -65,28 +65,50 @@ enum { }; enum { - XSC_ERR_CODE_NAK_RETRY = 0x40, - XSC_ERR_CODE_NAK_OPCODE = 0x41, - XSC_ERR_CODE_NAK_MR = 0x42, - XSC_ERR_CODE_NAK_OPERATION = 0x43, - XSC_ERR_CODE_NAK_RNR = 0x44, - XSC_ERR_CODE_LOCAL_MR = 0x45, - XSC_ERR_CODE_LOCAL_LEN = 0x46, - XSC_ERR_CODE_LOCAL_OPCODE = 0x47, - XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, - XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, - XSC_ERR_CODE_CQE_ACC = 0x4d, - XSC_ERR_CODE_FLUSH = 0x4e, - XSC_ERR_CODE_MALF_WQE_HOST = 0x50, - XSC_ERR_CODE_MALF_WQE_INFO = 0x51, - XSC_ERR_CODE_MR_NON_NAK = 0x52, - XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, - XSC_ERR_CODE_MANY_READ = 0x62, - XSC_ERR_CODE_LEN_GEN_CQE = 0x63, - XSC_ERR_CODE_MR = 0x65, - XSC_ERR_CODE_MR_GEN_CQE = 0x66, - XSC_ERR_CODE_OPERATION = 0x67, - XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, + XSC_ANDES_ERR_CODE_NAK_RETRY = 0x40, + XSC_ANDES_ERR_CODE_NAK_OPCODE = 0x41, + XSC_ANDES_ERR_CODE_NAK_MR = 0x42, + XSC_ANDES_ERR_CODE_NAK_OPERATION = 0x43, + XSC_ANDES_ERR_CODE_NAK_RNR = 0x44, + XSC_ANDES_ERR_CODE_LOCAL_MR = 0x45, + XSC_ANDES_ERR_CODE_LOCAL_LEN = 0x46, + XSC_ANDES_ERR_CODE_LOCAL_OPCODE = 0x47, + XSC_ANDES_ERR_CODE_CQ_OVER_FLOW = 0x48, + XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE = 0x49, + XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE = 0x4b, + XSC_ANDES_ERR_CODE_STRG_ACC = 0x4c, + XSC_ANDES_ERR_CODE_CQE_ACC = 0x4d, + XSC_ANDES_ERR_CODE_FLUSH = 0x4e, + XSC_ANDES_ERR_CODE_MALF_WQE_HOST = 0x50, + XSC_ANDES_ERR_CODE_MALF_WQE_INFO = 0x51, + XSC_ANDES_ERR_CODE_MR_NON_NAK = 0x52, + XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE = 0x61, + XSC_ANDES_ERR_CODE_MANY_READ = 0x62, + XSC_ANDES_ERR_CODE_LEN_GEN_CQE = 0x63, + XSC_ANDES_ERR_CODE_MR = 0x65, + XSC_ANDES_ERR_CODE_MR_GEN_CQE = 0x66, + XSC_ANDES_ERR_CODE_OPERATION = 0x67, + XSC_ANDES_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, +}; + +enum { + XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR = 0xa0, + XSC_DIAMOND_ERR_CODE_RTO_REQ = 0xa2, + XSC_DIAMOND_ERR_CODE_NAK_INV_REQ = 0xa4, + XSC_DIAMOND_ERR_CODE_NAK_MR = 0xa5, + XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR = 0xa6, + XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ = 0xa7, + XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT = 0xab, + XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA = 0xaf, + XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ = 0xb2, + XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE = 0xb4, + XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP = 0xb5, + XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE = 0xc4, + XSC_DIAMOND_ERR_CODE_LEN = 0xc5, + XSC_DIAMOND_ERR_CODE_REMOTE_MR = 0xd4, + XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE = 0xd5, + XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP = 0xd6, + XSC_DIAMOND_ERR_CODE_FLUSH = 0xff, }; /* TODO: sw cqe opcode*/ @@ -102,6 +124,9 @@ enum { XSC_OPCODE_RDMA_REQ_ERROR = 8, XSC_OPCODE_RDMA_RSP_ERROR = 9, XSC_OPCODE_RDMA_CQE_ERROR = 10, + XSC_OPCODE_RDMA_MAD_REQ_SEND = 11, + XSC_OPCODE_RDMA_MAD_RSP_RECV = 12, + XSC_OPCODE_RDMA_CQE_RAW_SNF = 13, }; enum { @@ -147,13 +172,7 @@ struct xsc_wqe_data_seg { }; struct xsc_cqe { - union { - uint8_t msg_opcode; - struct { - uint8_t error_code:7; - uint8_t is_error:1; - }; - }; + uint8_t placeholder1; __le32 qp_id:15; uint8_t :1; uint8_t se:1; @@ -166,7 +185,9 @@ struct xsc_cqe { __le32 vni; __le64 ts:48; __le16 wqe_id; - __le16 rsv[3]; + uint8_t placeholder2; + uint8_t rsv2; + __le16 rsv[2]; __le16 rsv1:15; uint8_t owner:1; }; @@ -174,32 +195,10 @@ struct xsc_cqe { /* Size of CQE */ #define XSC_CQE_SIZE sizeof(struct xsc_cqe) -union xsc_db_data { - struct { - __le32 sq_next_pid:16; - __le32 sqn:15; - __le32 :1; - }; - struct { - __le32 rq_next_pid:13; - __le32 rqn:15; - __le32 :4; - }; - struct { - __le32 cq_next_cid:16; - __le32 cqn:15; - __le32 solicited:1; - }; - __le32 raw_data; -}; - #define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) #define XSC_SEND_WQE_RING_DEPTH_MIN 16 #define XSC_CQE_RING_DEPTH_MIN 2 -#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 -#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 -#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) /* * Registers that are allocated by HW and accessed by SW in 4-byte granularity diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c index e24cfd2..8b04558 100644 --- a/providers/xscale/xscale.c +++ b/providers/xscale/xscale.c @@ -16,12 +16,14 @@ #include #include +#include #include #include "xscale.h" #include "xsc-abi.h" #include "wqe.h" #include "xsc_hsi.h" +#include "xsc_hw.h" #ifndef CPU_OR #define CPU_OR(x, y, z) do {} while (0) @@ -60,7 +62,8 @@ static const struct verbs_context_ops xsc_ctx_common_ops = { .alloc_pd = xsc_alloc_pd, .dealloc_pd = xsc_free_pd, .reg_mr = xsc_reg_mr, - .rereg_mr = xsc_rereg_mr, + .reg_dmabuf_mr = xsc_reg_dmabuf_mr, + .rereg_mr = NULL, .dereg_mr = xsc_dereg_mr, .alloc_mw = NULL, .dealloc_mw = NULL, @@ -417,6 +420,10 @@ static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) ctx->stall_cycles = xsc_stall_cq_poll_min; } + env_value = getenv("XSC_POST_RECV_DUMP_WQE"); + if (env_value) + xsc_post_recv_dump_wqe = (strcmp(env_value, "0")) ? 1 : 0; + } static void open_debug_file(struct xsc_context *ctx) @@ -787,6 +794,7 @@ static void xsc_munmap(struct xsc_context *context) munmap(context->cqm_armdb_va, context->db_mmap_size); } + static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) @@ -845,6 +853,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, context->send_ds_shift = xsc_ilog2(resp.send_ds_num); context->recv_ds_num = resp.recv_ds_num; context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); + context->device_id = resp.device_id; xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ @@ -894,6 +903,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, context->atomic_cap = device_attr.orig_attr.atomic_cap; context->cached_tso_caps = device_attr.tso_caps; context->max_dm_size = device_attr.max_dm_size; + context->max_cqe = device_attr.orig_attr.max_cqe; } for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h index c6cc9f7..e837e9b 100644 --- a/providers/xscale/xscale.h +++ b/providers/xscale/xscale.h @@ -45,6 +45,7 @@ enum { enum { XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, + XSC_QP_FLAG_RAWPACKET_SNIFFER = 1 << 11, }; @@ -66,6 +67,7 @@ enum { XSC_DBG_CTX = 1 << 7, XSC_DBG_PD = 1 << 8, XSC_DBG_MR = 1 << 9, + XSC_DBG_QP_RECV = 1 << 10, }; extern uint32_t xsc_debug_mask; @@ -75,7 +77,7 @@ extern int xsc_freeze_on_error_cqe; #ifdef XSC_DEBUG #define xsc_dbg(fp, mask, fmt, args...) \ do { \ - if (xsc_debug_mask & mask) { \ + if (unlikely(xsc_debug_mask & mask)) { \ char host[256]; \ char timestr[32]; \ struct tm now_tm; \ @@ -246,6 +248,7 @@ struct xsc_context { struct xsc_packet_pacing_caps packet_pacing_caps; uint16_t flow_action_flags; uint64_t max_dm_size; + uint32_t max_cqe; uint32_t eth_min_inline_size; uint32_t dump_fill_mkey; __be32 dump_fill_mkey_be; @@ -264,6 +267,7 @@ struct xsc_context { uint32_t send_ds_shift; uint32_t recv_ds_shift; FILE *dbg_fp; + uint16_t device_id; }; struct xsc_bitmap { @@ -343,7 +347,7 @@ struct xsc_cq { int stall_adaptive_enable; int stall_cycles; struct xsc_resource *cur_rsc; - struct xsc_cqe64 *cqe64; + struct xsc_cqe *cqe; uint32_t flags; int umr_opcode; struct xscdv_clock_info last_clock_info; @@ -387,7 +391,6 @@ struct xsc_dm { struct xsc_mr { struct verbs_mr vmr; - struct xsc_buf buf; uint32_t alloc_flags; }; @@ -408,6 +411,17 @@ struct xsc_qp { struct xsc_buf sq_buf; int sq_buf_size; + int err; + /* Number of WR entries posted in the current wr session */ + int nreq; + uint32_t cur_post_rb; + void *cur_ctrl; + void *cur_data; + int cur_ds_num; + uint32_t cur_data_len; + uint64_t cur_remote_addr; + uint32_t cur_remote_key; + uint8_t fm_cache; uint8_t sq_signal_bits; struct xsc_wq sq; @@ -426,6 +440,7 @@ struct xsc_qp { uint32_t tisn; uint32_t rqn; uint32_t sqn; + unsigned int err_occurred; }; struct xsc_ah { @@ -514,6 +529,7 @@ extern int xsc_stall_cq_poll_max; extern int xsc_stall_cq_inc_step; extern int xsc_stall_cq_dec_step; extern int xsc_single_threaded; +extern int xsc_post_recv_dump_wqe; static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) { @@ -658,6 +674,8 @@ int xsc_free_pd(struct ibv_pd *pd); struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access); +struct ibv_mr *xsc_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int acc); int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access); int xsc_dereg_mr(struct verbs_mr *mr); @@ -666,9 +684,8 @@ struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, int comp_vector); struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr); -int xsc_cq_fill_pfns(struct xsc_cq *cq, - const struct ibv_cq_init_attr_ex *cq_attr, - struct xsc_context *xctx); +void xsc_cq_fill_pfns(struct xsc_cq *cq, + const struct ibv_cq_init_attr_ex *cq_attr); int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, struct xsc_buf *buf, int nent, int cqe_sz); int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); @@ -710,7 +727,6 @@ int xsc_destroy_ah(struct ibv_ah *ah); int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int xsc_round_up_power_of_two(long long sz); -void *xsc_get_send_wqe(struct xsc_qp *qp, int n); struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr); int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); @@ -750,7 +766,7 @@ int xsc_read_counters(struct ibv_counters *counters, uint64_t *counters_value, uint32_t ncounters, uint32_t flags); - +int xsc_qp_fill_wr_pfns(struct xsc_qp *xqp, const struct ibv_qp_init_attr_ex *attr); static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) { int tind = uidx >> XSC_UIDX_TABLE_SHIFT; @@ -849,4 +865,9 @@ static inline uint8_t calc_sig(void *wqe, int size) return ~res; } +static inline void *xsc_get_send_wqe(struct xsc_qp *qp, int n) +{ + return qp->sq_start + (n << qp->sq.wqe_shift); +} + #endif /* XSC_H */ -- 2.43.0