From cd10cbeac856514302fbad8f1d17ec770f44faa9 Mon Sep 17 00:00:00 2001 From: Xin Tian Date: Wed, 26 Feb 2025 10:29:59 +0800 Subject: [PATCH] libxscale: Add Yunsilicon User Space RDMA Driver Introduce xscale provider for Yunsilicon devices. Signed-off-by: Xin Tian --- CMakeLists.txt | 1 + MAINTAINERS | 6 + README.md | 1 + debian/control | 1 + debian/copyright | 4 + debian/ibverbs-providers.install | 1 + debian/libibverbs-dev.install | 7 + kernel-headers/CMakeLists.txt | 4 + kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 + kernel-headers/rdma/xsc-abi.h | 333 +++ kernel-headers/rdma/xsc_user_ioctl_cmds.h | 163 ++ kernel-headers/rdma/xsc_user_ioctl_verbs.h | 27 + libibverbs/verbs.h | 1 + providers/xscale/CMakeLists.txt | 18 + providers/xscale/bitmap.h | 84 + providers/xscale/buf.c | 594 +++++ providers/xscale/cq.c | 1410 ++++++++++ providers/xscale/cqm_csr_defines.h | 180 ++ providers/xscale/dbrec.c | 131 + providers/xscale/libxsc.map | 59 + providers/xscale/qp.c | 678 +++++ providers/xscale/rqm_csr_defines.h | 200 ++ providers/xscale/sqm_csr_defines.h | 204 ++ providers/xscale/verbs.c | 2816 ++++++++++++++++++++ providers/xscale/wqe.h | 72 + providers/xscale/xsc-abi.h | 56 + providers/xscale/xsc_api.h | 29 + providers/xscale/xsc_hsi.h | 252 ++ providers/xscale/xsc_hw.h | 584 ++++ providers/xscale/xscale.c | 948 +++++++ providers/xscale/xscale.h | 834 ++++++ providers/xscale/xscdv.h | 876 ++++++ redhat/rdma-core.spec | 4 + 33 files changed, 10579 insertions(+) create mode 100644 kernel-headers/rdma/xsc-abi.h create mode 100644 kernel-headers/rdma/xsc_user_ioctl_cmds.h create mode 100644 kernel-headers/rdma/xsc_user_ioctl_verbs.h create mode 100644 providers/xscale/CMakeLists.txt create mode 100644 providers/xscale/bitmap.h create mode 100644 providers/xscale/buf.c create mode 100644 providers/xscale/cq.c create mode 100644 providers/xscale/cqm_csr_defines.h create mode 100644 providers/xscale/dbrec.c create mode 100644 providers/xscale/libxsc.map create mode 100644 providers/xscale/qp.c create mode 100644 providers/xscale/rqm_csr_defines.h create mode 100644 providers/xscale/sqm_csr_defines.h create mode 100644 providers/xscale/verbs.c create mode 100644 providers/xscale/wqe.h create mode 100644 providers/xscale/xsc-abi.h create mode 100644 providers/xscale/xsc_api.h create mode 100644 providers/xscale/xsc_hsi.h create mode 100755 providers/xscale/xsc_hw.h create mode 100644 providers/xscale/xscale.c create mode 100644 providers/xscale/xscale.h create mode 100644 providers/xscale/xscdv.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 98985e7..c803f73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -748,6 +748,7 @@ add_subdirectory(providers/mthca) add_subdirectory(providers/ocrdma) add_subdirectory(providers/qedr) add_subdirectory(providers/vmw_pvrdma) +add_subdirectory(providers/xscale) endif() add_subdirectory(providers/hfi1verbs) diff --git a/MAINTAINERS b/MAINTAINERS index 4b24117..d3b66ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -185,6 +185,12 @@ L: pv-drivers@vmware.com S: Supported F: providers/vmw_pvrdma/ +XSCALE USERSPACE PROVIDER (for xsc_ib.ko) +M: Honggang Wei +M: Xin Tianx +S: Supported +F: providers/xscale/ + PYVERBS M: Edward Srouji S: Supported diff --git a/README.md b/README.md index 928bdc4..8f7a9a5 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ is included: - rdma_rxe.ko - siw.ko - vmw_pvrdma.ko + - xsc_ib.ko Additional service daemons are provided for: - srp_daemon (ib_srp.ko) diff --git a/debian/control b/debian/control index 2a55372..5296ea7 100644 --- a/debian/control +++ b/debian/control @@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs - rxe: A software implementation of the RoCE protocol - siw: A software implementation of the iWarp protocol - vmw_pvrdma: VMware paravirtual RDMA device + - xscale: Yunsilicon RDMA device Package: ibverbs-utils Architecture: linux-any diff --git a/debian/copyright b/debian/copyright index 36ac71e..0a623e3 100644 --- a/debian/copyright +++ b/debian/copyright @@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/* Copyright: 2012-2016 VMware, Inc. License: BSD-2-clause or GPL-2 +Files: providers/xscale/* +Copyright: 2021-2025, Yunsilicon Technology Co., Ltd. +License: GPL-2 + Files: rdma-ndd/* Copyright: 2004-2016, Intel Corporation. License: BSD-MIT or GPL-2 diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install index fea15e0..d20bd42 100644 --- a/debian/ibverbs-providers.install +++ b/debian/ibverbs-providers.install @@ -5,3 +5,4 @@ usr/lib/*/libhns.so.* usr/lib/*/libmana.so.* usr/lib/*/libmlx4.so.* usr/lib/*/libmlx5.so.* +usr/lib/*/libxscale.so.* diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install index ef5b9a4..8cd09fa 100644 --- a/debian/libibverbs-dev.install +++ b/debian/libibverbs-dev.install @@ -26,6 +26,13 @@ usr/lib/*/libmlx4.a usr/lib/*/libmlx4.so usr/lib/*/libmlx5.a usr/lib/*/libmlx5.so +<<<<<<< HEAD +======= +usr/lib/*/libxscale.a +usr/lib/*/libxscale.so +usr/lib/*/libzrdma.a +usr/lib/*/libzrdma.so +>>>>>>> 3be924c... libxscale: Add Yunsilicon User Space RDMA Driver usr/lib/*/pkgconfig/libefa.pc usr/lib/*/pkgconfig/libhns.pc usr/lib/*/pkgconfig/libibverbs.pc diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt index 82c191c..9acb1fc 100644 --- a/kernel-headers/CMakeLists.txt +++ b/kernel-headers/CMakeLists.txt @@ -26,6 +26,9 @@ publish_internal_headers(rdma rdma/rvt-abi.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h + rdma/xsc-abi.h + rdma/xsc_user_ioctl_cmds.h + rdma/xsc_user_ioctl_verbs.h ) publish_internal_headers(rdma/hfi @@ -80,6 +83,7 @@ rdma_kernel_provider_abi( rdma/rdma_user_rxe.h rdma/siw-abi.h rdma/vmw_pvrdma-abi.h + rdma/xsc-abi.h ) publish_headers(infiniband diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h index fe15bc7..cfac178 100644 --- a/kernel-headers/rdma/ib_user_ioctl_verbs.h +++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h @@ -255,6 +255,7 @@ enum rdma_driver_id { RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, RDMA_DRIVER_MANA, + RDMA_DRIVER_XSC = 1, }; enum ib_uverbs_gid_type { diff --git a/kernel-headers/rdma/xsc-abi.h b/kernel-headers/rdma/xsc-abi.h new file mode 100644 index 0000000..4af6408 --- /dev/null +++ b/kernel-headers/rdma/xsc-abi.h @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSC_ABI_USER_H +#define XSC_ABI_USER_H + +#include +#include /* For ETH_ALEN. */ +#include + +enum { + XSC_WQ_FLAG_SIGNATURE = 1 << 0, +}; + +/* Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct xsc_ib_alloc_ucontext_req { + __u32 rsvd0; + __u32 rsvd1; +}; + +enum xsc_user_cmds_supp_uhw { + XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, + XSC_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, +}; + +struct xsc_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 cache_line_size; + __u16 max_sq_desc_sz; + __u16 max_rq_desc_sz; + __u32 max_send_wqebb; + __u32 max_recv_wr; + __u16 num_ports; + __u16 device_id; + __u64 qpm_tx_db; + __u64 qpm_rx_db; + __u64 cqm_next_cid_reg; + __u64 cqm_armdb; + __u32 send_ds_num; + __u32 recv_ds_num; + __u32 cmds_supp_uhw; +}; + +struct xsc_ib_alloc_pd_resp { + __u32 pdn; +}; + +struct xsc_ib_tso_caps { + __u32 max_tso; /* Maximum tso payload size in bytes */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; +}; + +struct xsc_ib_rss_caps { + __aligned_u64 rx_hash_fields_mask; /* enum xsc_rx_hash_fields */ + __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ + __u8 reserved[7]; +}; + +enum xsc_ib_cqe_comp_res_format { + XSC_IB_CQE_RES_FORMAT_HASH = 1 << 0, + XSC_IB_CQE_RES_FORMAT_CSUM = 1 << 1, + XSC_IB_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, +}; + +struct xsc_ib_cqe_comp_caps { + __u32 max_num; + __u32 supported_format; /* enum xsc_ib_cqe_comp_res_format */ +}; + +enum xsc_ib_packet_pacing_cap_flags { + XSC_IB_PP_SUPPORT_BURST = 1 << 0, +}; + +struct xsc_packet_pacing_caps { + __u32 qp_rate_limit_min; + __u32 qp_rate_limit_max; /* In kpbs */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; + __u8 cap_flags; /* enum xsc_ib_packet_pacing_cap_flags */ + __u8 reserved[3]; +}; + +enum xsc_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + XSC_IB_ALLOW_MPW = 1 << 1, + XSC_IB_SUPPORT_EMPW = 1 << 2, +}; + +enum xsc_ib_sw_parsing_offloads { + XSC_IB_SW_PARSING = 1 << 0, + XSC_IB_SW_PARSING_CSUM = 1 << 1, + XSC_IB_SW_PARSING_LSO = 1 << 2, +}; + +struct xsc_ib_sw_parsing_caps { + __u32 sw_parsing_offloads; /* enum xsc_ib_sw_parsing_offloads */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + +struct xsc_ib_striding_rq_caps { + __u32 min_single_stride_log_num_of_bytes; + __u32 max_single_stride_log_num_of_bytes; + __u32 min_single_wqe_log_num_of_strides; + __u32 max_single_wqe_log_num_of_strides; + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; + __u32 reserved; +}; + +enum xsc_ib_query_dev_resp_flags { + /* Support 128B CQE compression */ + XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, + XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, +}; + +enum xsc_ib_tunnel_offloads { + XSC_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, + XSC_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, + XSC_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2, + XSC_IB_TUNNELED_OFFLOADS_MPLS_GRE = 1 << 3, + XSC_IB_TUNNELED_OFFLOADS_MPLS_UDP = 1 << 4, +}; + +struct xsc_ib_query_device_resp { + __u32 comp_mask; + __u32 response_length; + struct xsc_ib_tso_caps tso_caps; + struct xsc_ib_rss_caps rss_caps; + struct xsc_ib_cqe_comp_caps cqe_comp_caps; + struct xsc_packet_pacing_caps packet_pacing_caps; + __u32 xsc_ib_support_multi_pkt_send_wqes; + __u32 flags; /* Use enum xsc_ib_query_dev_resp_flags */ + struct xsc_ib_sw_parsing_caps sw_parsing_caps; + struct xsc_ib_striding_rq_caps striding_rq_caps; + __u32 tunnel_offloads_caps; /* enum xsc_ib_tunnel_offloads */ + __u32 reserved; +}; + +struct xsc_ib_create_cq { + __aligned_u64 buf_addr; + __aligned_u64 db_addr; + __u32 cqe_size; +}; + +struct xsc_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct xsc_ib_resize_cq { + __aligned_u64 buf_addr; + __u16 cqe_size; + __u16 reserved0; + __u32 reserved1; +}; + +struct xsc_ib_create_qp { + __aligned_u64 buf_addr; + __aligned_u64 db_addr; + __u32 sq_wqe_count; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 flags; +}; + +/* RX Hash function flags */ +enum xsc_rx_hash_function_flags { + XSC_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. +*/ +enum xsc_rx_hash_fields { + XSC_RX_HASH_SRC_IPV4 = 1 << 0, + XSC_RX_HASH_DST_IPV4 = 1 << 1, + XSC_RX_HASH_SRC_IPV6 = 1 << 2, + XSC_RX_HASH_DST_IPV6 = 1 << 3, + XSC_RX_HASH_SRC_PORT_TCP = 1 << 4, + XSC_RX_HASH_DST_PORT_TCP = 1 << 5, + XSC_RX_HASH_SRC_PORT_UDP = 1 << 6, + XSC_RX_HASH_DST_PORT_UDP = 1 << 7, + XSC_RX_HASH_IPSEC_SPI = 1 << 8, + /* Save bits for future fields */ + XSC_RX_HASH_INNER = (1UL << 31), +}; + +struct xsc_ib_create_qp_rss { + __aligned_u64 rx_hash_fields_mask; /* enum xscd_rx_hash_fields */ + __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */ + __u8 rx_key_len; /* valid only for Toeplitz */ + __u8 reserved[6]; + __u8 rx_hash_key[128]; /* valid only for Toeplitz */ + __u32 comp_mask; + __u32 flags; +}; + +struct xsc_ib_create_qp_resp { + __u32 bfreg_index; + __u32 resv; +}; + +enum xsc_ib_create_wq_mask { + XSC_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), +}; + +struct xsc_ib_create_wq { + __aligned_u64 buf_addr; + __aligned_u64 db_addr; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 user_index; + __u32 flags; + __u32 comp_mask; + __u32 single_stride_log_num_of_bytes; + __u32 single_wqe_log_num_of_strides; + __u32 two_byte_shift_en; +}; + +struct xsc_ib_create_ah_resp { + __u32 response_length; + __u8 dmac[ETH_ALEN]; + __u8 reserved[6]; +}; + +struct xsc_ib_burst_info { + __u32 max_burst_sz; + __u16 typical_pkt_sz; + __u16 reserved; +}; + +struct xsc_ib_modify_qp { + __u32 comp_mask; + struct xsc_ib_burst_info burst_info; + __u32 reserved; +}; + +struct xsc_ib_modify_qp_resp { + __u32 response_length; + __u32 dctn; +}; + +struct xsc_ib_create_wq_resp { + __u32 response_length; + __u32 reserved; +}; + +struct xsc_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + +struct xsc_ib_clock_info { + __u32 sign; + __u32 resv; + __aligned_u64 nsec; + __aligned_u64 cycles; + __aligned_u64 frac; + __u32 mult; + __u32 shift; + __aligned_u64 mask; + __aligned_u64 overflow_period; +}; + +enum xsc_ib_mmap_cmd { + XSC_IB_MMAP_REGULAR_PAGE = 0, + XSC_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, + XSC_IB_MMAP_WC_PAGE = 2, + XSC_IB_MMAP_NC_PAGE = 3, + XSC_IB_MMAP_CORE_CLOCK = 5, + XSC_IB_MMAP_ALLOC_WC = 6, + XSC_IB_MMAP_CLOCK_INFO = 7, + XSC_IB_MMAP_DEVICE_MEM = 8, +}; + +enum { + XSC_IB_CLOCK_INFO_KERNEL_UPDATING = 1, +}; + +struct xsc_ib_flow_counters_desc { + __u32 description; + __u32 index; +}; + +struct xsc_ib_flow_counters_data { + RDMA_UAPI_PTR(struct xsc_ib_flow_counters_desc *, counters_data); + __u32 ncounters; + __u32 reserved; +}; + +struct xsc_ib_create_flow { + __u32 ncounters_data; + __u32 reserved; + /* + * Following are counters data based on ncounters_data, each + * entry in the data[] should match a corresponding counter object + * that was pointed by a counters spec upon the flow creation + */ + struct xsc_ib_flow_counters_data data[]; +}; + +#endif /* XSC_ABI_USER_H */ diff --git a/kernel-headers/rdma/xsc_user_ioctl_cmds.h b/kernel-headers/rdma/xsc_user_ioctl_cmds.h new file mode 100644 index 0000000..590a061 --- /dev/null +++ b/kernel-headers/rdma/xsc_user_ioctl_cmds.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSC_USER_IOCTL_CMDS_H +#define XSC_USER_IOCTL_CMDS_H + +#include +#include + +enum xsc_ib_create_flow_action_attrs { + /* This attribute belong to the driver namespace */ + XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum xsc_ib_alloc_dm_attrs { + XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, +}; + +enum xsc_ib_devx_methods { + XSC_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_DEVX_QUERY_UAR, + XSC_IB_METHOD_DEVX_QUERY_EQN, +}; + +enum xsc_ib_devx_other_attrs { + XSC_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, +}; + +enum xsc_ib_devx_obj_create_attrs { + XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, +}; + +enum xsc_ib_devx_query_uar_attrs { + XSC_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, +}; + +enum xsc_ib_devx_obj_destroy_attrs { + XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum xsc_ib_devx_obj_modify_attrs { + XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, +}; + +enum xsc_ib_devx_obj_query_attrs { + XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, +}; + +enum xsc_ib_devx_query_eqn_attrs { + XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, +}; + +enum xsc_ib_devx_obj_methods { + XSC_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_DEVX_OBJ_DESTROY, + XSC_IB_METHOD_DEVX_OBJ_MODIFY, + XSC_IB_METHOD_DEVX_OBJ_QUERY, +}; + +enum xsc_ib_devx_umem_reg_attrs { + XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, + XSC_IB_ATTR_DEVX_UMEM_REG_LEN, + XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, + XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, +}; + +enum xsc_ib_devx_umem_dereg_attrs { + XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum xsc_ib_devx_umem_methods { + XSC_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_DEVX_UMEM_DEREG, +}; + +enum xsc_ib_objects { + XSC_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_OBJECT_DEVX_OBJ, + XSC_IB_OBJECT_DEVX_UMEM, + XSC_IB_OBJECT_FLOW_MATCHER, +}; + +enum xsc_ib_flow_matcher_create_attrs { + XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, +}; + +enum xsc_ib_flow_matcher_destroy_attrs { + XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum xsc_ib_flow_matcher_methods { + XSC_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_FLOW_MATCHER_DESTROY, +}; + +#define XSC_IB_DW_MATCH_PARAM 0x80 + +struct xsc_ib_match_params { + __u32 match_params[XSC_IB_DW_MATCH_PARAM]; +}; + +enum xsc_ib_flow_type { + XSC_IB_FLOW_TYPE_NORMAL, + XSC_IB_FLOW_TYPE_SNIFFER, + XSC_IB_FLOW_TYPE_ALL_DEFAULT, + XSC_IB_FLOW_TYPE_MC_DEFAULT, +}; + +enum xsc_ib_create_flow_attrs { + XSC_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + XSC_IB_ATTR_CREATE_FLOW_DEST_QP, + XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, + XSC_IB_ATTR_CREATE_FLOW_MATCHER, + XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + XSC_IB_ATTR_CREATE_FLOW_TAG, +}; + +enum xsc_ib_destoy_flow_attrs { + XSC_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum xsc_ib_flow_methods { + XSC_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_DESTROY_FLOW, +}; + +enum xsc_ib_flow_action_methods { + XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, +}; + +enum xsc_ib_create_flow_action_create_modify_header_attrs { + XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, +}; + +enum xsc_ib_create_flow_action_create_packet_reformat_attrs { + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, +}; + +#endif diff --git a/kernel-headers/rdma/xsc_user_ioctl_verbs.h b/kernel-headers/rdma/xsc_user_ioctl_verbs.h new file mode 100644 index 0000000..614f2ee --- /dev/null +++ b/kernel-headers/rdma/xsc_user_ioctl_verbs.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSC_USER_IOCTL_VERBS_H +#define XSC_USER_IOCTL_VERBS_H + +#include + +enum xsc_ib_uapi_flow_action_flags { + XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, +}; + +enum xsc_ib_uapi_flow_table_type { + XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, + XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, +}; + +enum xsc_ib_uapi_flow_action_packet_reformat_type { + XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0, + XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1, + XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2, + XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, +}; + +#endif diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 78129fd..d6a053e 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr; extern const struct verbs_device_ops verbs_provider_rxe; extern const struct verbs_device_ops verbs_provider_siw; extern const struct verbs_device_ops verbs_provider_vmw_pvrdma; +extern const struct verbs_device_ops verbs_provider_xscale; extern const struct verbs_device_ops verbs_provider_all; extern const struct verbs_device_ops verbs_provider_none; void ibv_static_providers(void *unused, ...); diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt new file mode 100644 index 0000000..1188db1 --- /dev/null +++ b/providers/xscale/CMakeLists.txt @@ -0,0 +1,18 @@ +rdma_shared_provider(xscale libxsc.map + 1 1.24.${PACKAGE_VERSION} + buf.c + cq.c + dbrec.c + xscale.c + qp.c + verbs.c +) + +publish_headers(infiniband + ../../kernel-headers/rdma/xsc_user_ioctl_verbs.h + ../../kernel-headers/rdma/xsc_user_ioctl_cmds.h + xsc_api.h + xscdv.h +) + +rdma_pkg_config("xscale" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") diff --git a/providers/xscale/bitmap.h b/providers/xscale/bitmap.h new file mode 100644 index 0000000..ef7f202 --- /dev/null +++ b/providers/xscale/bitmap.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef BITMAP_H +#define BITMAP_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xscale.h" + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define XSC_SHM_ADDR ((void *)0x8000000000000000UL) +#define XSC_SHMAT_FLAGS (SHM_RND) +#else +#define XSC_SHM_ADDR NULL +#define XSC_SHMAT_FLAGS 0 +#endif + +#define BITS_PER_LONG (8 * sizeof(long)) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) + +#ifndef HPAGE_SIZE +#define HPAGE_SIZE (2UL * 1024 * 1024) +#endif + +#define XSC_SHM_LENGTH HPAGE_SIZE +#define XSC_Q_CHUNK_SIZE 32768 +#define XSC_SHM_NUM_REGION 64 + +static inline unsigned long xsc_ffz(uint32_t word) +{ + return __builtin_ffs(~word) - 1; +} + +static inline uint32_t xsc_find_first_zero_bit(const unsigned long *addr, + uint32_t size) +{ + const unsigned long *p = addr; + uint32_t result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG - 1)) { + tmp = *(p++); + if (~tmp) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) | (~0UL << size); + if (tmp == (uint32_t)~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found: + return result + xsc_ffz(tmp); +} + +static inline void xsc_set_bit(unsigned int nr, unsigned long *addr) +{ + addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG)); +} + +static inline void xsc_clear_bit(unsigned int nr, unsigned long *addr) +{ + addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG)); +} + +static inline int xsc_test_bit(unsigned int nr, const unsigned long *addr) +{ + return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG))); +} + +#endif diff --git a/providers/xscale/buf.c b/providers/xscale/buf.c new file mode 100644 index 0000000..61daf6d --- /dev/null +++ b/providers/xscale/buf.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "xscale.h" +#include "bitmap.h" + +static int xsc_bitmap_init(struct xsc_bitmap *bitmap, uint32_t num, + uint32_t mask) +{ + bitmap->last = 0; + bitmap->top = 0; + bitmap->max = num; + bitmap->avail = num; + bitmap->mask = mask; + bitmap->avail = bitmap->max; + bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(*bitmap->table)); + if (!bitmap->table) + return -ENOMEM; + + return 0; +} + +static void bitmap_free_range(struct xsc_bitmap *bitmap, uint32_t obj, + int cnt) +{ + int i; + + obj &= bitmap->max - 1; + + for (i = 0; i < cnt; i++) + xsc_clear_bit(obj + i, bitmap->table); + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + bitmap->avail += cnt; +} + +static int bitmap_empty(struct xsc_bitmap *bitmap) +{ + return (bitmap->avail == bitmap->max) ? 1 : 0; +} + +static int bitmap_avail(struct xsc_bitmap *bitmap) +{ + return bitmap->avail; +} + +static void xsc_bitmap_cleanup(struct xsc_bitmap *bitmap) +{ + if (bitmap->table) + free(bitmap->table); +} + +static void free_huge_mem(struct xsc_hugetlb_mem *hmem) +{ + xsc_bitmap_cleanup(&hmem->bitmap); + if (shmdt(hmem->shmaddr) == -1) + xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); + shmctl(hmem->shmid, IPC_RMID, NULL); + free(hmem); +} + +static int xsc_bitmap_alloc(struct xsc_bitmap *bitmap) +{ + uint32_t obj; + int ret; + + obj = xsc_find_first_zero_bit(bitmap->table, bitmap->max); + if (obj < bitmap->max) { + xsc_set_bit(obj, bitmap->table); + bitmap->last = (obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; + obj |= bitmap->top; + ret = obj; + } else + ret = -1; + + if (ret != -1) + --bitmap->avail; + + return ret; +} + +static uint32_t find_aligned_range(unsigned long *bitmap, + uint32_t start, uint32_t nbits, + int len, int alignment) +{ + uint32_t end, i; + +again: + start = align(start, alignment); + + while ((start < nbits) && xsc_test_bit(start, bitmap)) + start += alignment; + + if (start >= nbits) + return -1; + + end = start + len; + if (end > nbits) + return -1; + + for (i = start + 1; i < end; i++) { + if (xsc_test_bit(i, bitmap)) { + start = i + 1; + goto again; + } + } + + return start; +} + +static int bitmap_alloc_range(struct xsc_bitmap *bitmap, int cnt, + int align) +{ + uint32_t obj; + int ret, i; + + if (cnt == 1 && align == 1) + return xsc_bitmap_alloc(bitmap); + + if (cnt > bitmap->max) + return -1; + + obj = find_aligned_range(bitmap->table, bitmap->last, + bitmap->max, cnt, align); + if (obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask; + obj = find_aligned_range(bitmap->table, 0, bitmap->max, + cnt, align); + } + + if (obj < bitmap->max) { + for (i = 0; i < cnt; i++) + xsc_set_bit(obj + i, bitmap->table); + if (obj == bitmap->last) { + bitmap->last = (obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + obj |= bitmap->top; + ret = obj; + } else + ret = -1; + + if (ret != -1) + bitmap->avail -= cnt; + + return obj; +} + +static struct xsc_hugetlb_mem *alloc_huge_mem(size_t size) +{ + struct xsc_hugetlb_mem *hmem; + size_t shm_len; + + hmem = malloc(sizeof(*hmem)); + if (!hmem) + return NULL; + + shm_len = align(size, XSC_SHM_LENGTH); + hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); + if (hmem->shmid == -1) { + xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); + goto out_free; + } + + hmem->shmaddr = shmat(hmem->shmid, XSC_SHM_ADDR, XSC_SHMAT_FLAGS); + if (hmem->shmaddr == (void *)-1) { + xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); + goto out_rmid; + } + + if (xsc_bitmap_init(&hmem->bitmap, shm_len / XSC_Q_CHUNK_SIZE, + shm_len / XSC_Q_CHUNK_SIZE - 1)) { + xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); + goto out_shmdt; + } + + /* + * Marked to be destroyed when process detaches from shmget segment + */ + shmctl(hmem->shmid, IPC_RMID, NULL); + + return hmem; + +out_shmdt: + if (shmdt(hmem->shmaddr) == -1) + xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno)); + +out_rmid: + shmctl(hmem->shmid, IPC_RMID, NULL); + +out_free: + free(hmem); + return NULL; +} + +static int alloc_huge_buf(struct xsc_context *xctx, struct xsc_buf *buf, + size_t size, int page_size) +{ + int found = 0; + int nchunk; + struct xsc_hugetlb_mem *hmem; + int ret; + + buf->length = align(size, XSC_Q_CHUNK_SIZE); + nchunk = buf->length / XSC_Q_CHUNK_SIZE; + + if (!nchunk) + return 0; + + xsc_spin_lock(&xctx->hugetlb_lock); + list_for_each(&xctx->hugetlb_list, hmem, entry) { + if (bitmap_avail(&hmem->bitmap)) { + buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); + if (buf->base != -1) { + buf->hmem = hmem; + found = 1; + break; + } + } + } + xsc_spin_unlock(&xctx->hugetlb_lock); + + if (!found) { + hmem = alloc_huge_mem(buf->length); + if (!hmem) + return -1; + + buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); + if (buf->base == -1) { + free_huge_mem(hmem); + /* TBD: remove after proven stability */ + fprintf(stderr, "BUG: huge allocation\n"); + return -1; + } + + buf->hmem = hmem; + + xsc_spin_lock(&xctx->hugetlb_lock); + if (bitmap_avail(&hmem->bitmap)) + list_add(&xctx->hugetlb_list, &hmem->entry); + else + list_add_tail(&xctx->hugetlb_list, &hmem->entry); + xsc_spin_unlock(&xctx->hugetlb_lock); + } + + buf->buf = hmem->shmaddr + buf->base * XSC_Q_CHUNK_SIZE; + + ret = ibv_dontfork_range(buf->buf, buf->length); + if (ret) { + goto out_fork; + } + buf->type = XSC_ALLOC_TYPE_HUGE; + + return 0; + +out_fork: + xsc_spin_lock(&xctx->hugetlb_lock); + bitmap_free_range(&hmem->bitmap, buf->base, nchunk); + if (bitmap_empty(&hmem->bitmap)) { + list_del(&hmem->entry); + xsc_spin_unlock(&xctx->hugetlb_lock); + free_huge_mem(hmem); + } else + xsc_spin_unlock(&xctx->hugetlb_lock); + + return -1; +} + +static void free_huge_buf(struct xsc_context *ctx, struct xsc_buf *buf) +{ + int nchunk; + + nchunk = buf->length / XSC_Q_CHUNK_SIZE; + if (!nchunk) + return; + + xsc_spin_lock(&ctx->hugetlb_lock); + bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk); + if (bitmap_empty(&buf->hmem->bitmap)) { + list_del(&buf->hmem->entry); + xsc_spin_unlock(&ctx->hugetlb_lock); + free_huge_mem(buf->hmem); + } else + xsc_spin_unlock(&ctx->hugetlb_lock); +} + +void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf) +{ + ibv_dofork_range(buf->buf, buf->length); + ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data); +} + +int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, + size_t size) +{ + void *addr; + + addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data); + if (addr || size == 0) { + if (ibv_dontfork_range(addr, size)) { + xsc_err("External mode dontfork_range failed\n"); + ctx->extern_alloc.free(addr, + ctx->extern_alloc.data); + return -1; + } + buf->buf = addr; + buf->length = size; + buf->type = XSC_ALLOC_TYPE_EXTERNAL; + return 0; + } + + xsc_err("External alloc failed\n"); + return -1; +} + +int xsc_alloc_prefered_buf(struct xsc_context *xctx, + struct xsc_buf *buf, + size_t size, int page_size, + enum xsc_alloc_type type, + const char *component) +{ + int ret; + + /* + * Fallback mechanism priority: + * huge pages + * contig pages + * default + */ + if (type == XSC_ALLOC_TYPE_HUGE || + type == XSC_ALLOC_TYPE_PREFER_HUGE || + type == XSC_ALLOC_TYPE_ALL) { + ret = alloc_huge_buf(xctx, buf, size, page_size); + if (!ret) + return 0; + + if (type == XSC_ALLOC_TYPE_HUGE) + return -1; + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Huge mode allocation failed, fallback to %s mode\n", + XSC_ALLOC_TYPE_ALL ? "contig" : "default"); + } + + if (type == XSC_ALLOC_TYPE_CONTIG || + type == XSC_ALLOC_TYPE_PREFER_CONTIG || + type == XSC_ALLOC_TYPE_ALL) { + ret = xsc_alloc_buf_contig(xctx, buf, size, page_size, component); + if (!ret) + return 0; + + if (type == XSC_ALLOC_TYPE_CONTIG) + return -1; + xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Contig allocation failed, fallback to default mode\n"); + } + + if (type == XSC_ALLOC_TYPE_EXTERNAL) + return xsc_alloc_buf_extern(xctx, buf, size); + + return xsc_alloc_buf(buf, size, page_size); + +} + +int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf) +{ + int err = 0; + + switch (buf->type) { + case XSC_ALLOC_TYPE_ANON: + xsc_free_buf(buf); + break; + + case XSC_ALLOC_TYPE_HUGE: + free_huge_buf(ctx, buf); + break; + + case XSC_ALLOC_TYPE_CONTIG: + xsc_free_buf_contig(ctx, buf); + break; + + case XSC_ALLOC_TYPE_EXTERNAL: + xsc_free_buf_extern(ctx, buf); + break; + + default: + fprintf(stderr, "Bad allocation type\n"); + } + + return err; +} + +/* This function computes log2(v) rounded up. + We don't want to have a dependency to libm which exposes ceil & log2 APIs. + Code was written based on public domain code: + URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. +*/ +static uint32_t xsc_get_block_order(uint32_t v) +{ + static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; + int i; + uint32_t input_val = v; + + register uint32_t r = 0;/* result of log2(v) will go here */ + for (i = 4; i >= 0; i--) { + if (v & bits_arr[i]) { + v >>= shift_arr[i]; + r |= shift_arr[i]; + } + } + /* Rounding up if required */ + r += !!(input_val & ((1 << r) - 1)); + + return r; +} + +bool xsc_is_extern_alloc(struct xsc_context *context) +{ + return context->extern_alloc.alloc && context->extern_alloc.free; +} + +void xsc_get_alloc_type(struct xsc_context *context, + const char *component, + enum xsc_alloc_type *alloc_type, + enum xsc_alloc_type default_type) + +{ + char *env_value; + char name[128]; + + if (xsc_is_extern_alloc(context)) { + *alloc_type = XSC_ALLOC_TYPE_EXTERNAL; + return; + } + + snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); + + *alloc_type = default_type; + + env_value = getenv(name); + if (env_value) { + if (!strcasecmp(env_value, "ANON")) + *alloc_type = XSC_ALLOC_TYPE_ANON; + else if (!strcasecmp(env_value, "HUGE")) + *alloc_type = XSC_ALLOC_TYPE_HUGE; + else if (!strcasecmp(env_value, "CONTIG")) + *alloc_type = XSC_ALLOC_TYPE_CONTIG; + else if (!strcasecmp(env_value, "PREFER_CONTIG")) + *alloc_type = XSC_ALLOC_TYPE_PREFER_CONTIG; + else if (!strcasecmp(env_value, "PREFER_HUGE")) + *alloc_type = XSC_ALLOC_TYPE_PREFER_HUGE; + else if (!strcasecmp(env_value, "ALL")) + *alloc_type = XSC_ALLOC_TYPE_ALL; + } +} + +static void xsc_alloc_get_env_info(int *max_block_log, + int *min_block_log, + const char *component) + +{ + char *env; + int value; + char name[128]; + + /* First set defaults */ + *max_block_log = XSC_MAX_LOG2_CONTIG_BLOCK_SIZE; + *min_block_log = XSC_MIN_LOG2_CONTIG_BLOCK_SIZE; + + snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); + env = getenv(name); + if (env) { + value = atoi(env); + if (value <= XSC_MAX_LOG2_CONTIG_BLOCK_SIZE && + value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE) + *max_block_log = value; + else + fprintf(stderr, "Invalid value %d for %s\n", + value, name); + } + sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); + env = getenv(name); + if (env) { + value = atoi(env); + if (value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE && + value <= *max_block_log) + *min_block_log = value; + else + fprintf(stderr, "Invalid value %d for %s\n", + value, name); + } +} + +int xsc_alloc_buf_contig(struct xsc_context *xctx, + struct xsc_buf *buf, size_t size, + int page_size, + const char *component) +{ + void *addr = MAP_FAILED; + int block_size_exp; + int max_block_log; + int min_block_log; + struct ibv_context *context = &xctx->ibv_ctx.context; + off_t offset; + + xsc_alloc_get_env_info(&max_block_log, + &min_block_log, + component); + + block_size_exp = xsc_get_block_order(size); + + if (block_size_exp > max_block_log) + block_size_exp = max_block_log; + + do { + offset = 0; + set_command(XSC_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset); + set_order(block_size_exp, &offset); + addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, + context->cmd_fd, page_size * offset); + if (addr != MAP_FAILED) + break; + + /* + * The kernel returns EINVAL if not supported + */ + if (errno == EINVAL) + return -1; + + block_size_exp -= 1; + } while (block_size_exp >= min_block_log); + xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "block order %d, addr %p\n", block_size_exp, addr); + + if (addr == MAP_FAILED) + return -1; + + if (ibv_dontfork_range(addr, size)) { + munmap(addr, size); + return -1; + } + + buf->buf = addr; + buf->length = size; + buf->type = XSC_ALLOC_TYPE_CONTIG; + + return 0; +} + +void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf) +{ + ibv_dofork_range(buf->buf, buf->length); + munmap(buf->buf, buf->length); +} + +int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size) +{ + int ret; + int al_size; + + al_size = align(size, page_size); + ret = posix_memalign(&buf->buf, page_size, al_size); + if (ret) + return ret; + + ret = ibv_dontfork_range(buf->buf, al_size); + if (ret) + free(buf->buf); + + if (!ret) { + buf->length = al_size; + buf->type = XSC_ALLOC_TYPE_ANON; + } + + return ret; +} + +void xsc_free_buf(struct xsc_buf *buf) +{ + ibv_dofork_range(buf->buf, buf->length); + free(buf->buf); +} diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c new file mode 100644 index 0000000..e2619f0 --- /dev/null +++ b/providers/xscale/cq.c @@ -0,0 +1,1410 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "xscale.h" +#include "wqe.h" +#include "xsc_hsi.h" + +enum { + CQ_OK = 0, + CQ_EMPTY = -1, + CQ_POLL_ERR = -2 +}; + +enum { + XSC_CQE_APP_TAG_MATCHING = 1, +}; + +enum { + XSC_CQE_APP_OP_TM_CONSUMED = 0x1, + XSC_CQE_APP_OP_TM_EXPECTED = 0x2, + XSC_CQE_APP_OP_TM_UNEXPECTED = 0x3, + XSC_CQE_APP_OP_TM_NO_TAG = 0x4, + XSC_CQE_APP_OP_TM_APPEND = 0x5, + XSC_CQE_APP_OP_TM_REMOVE = 0x6, + XSC_CQE_APP_OP_TM_NOOP = 0x7, + XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV = 0x9, + XSC_CQE_APP_OP_TM_CONSUMED_MSG = 0xA, + XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV = 0xB, + XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED = 0xC, +}; + +static const uint32_t xsc_msg_opcode[][2][2] = { + [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND, + [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND_IMMDT, + [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV, + [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV_IMMDT, + [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE, + [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE_IMMDT, + [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, + [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_WRITE_IMMDT, + [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_READ, + [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, + [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, + [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR, +}; + +static const uint32_t xsc_cqe_opcode[] = { + [XSC_OPCODE_RDMA_REQ_SEND] = IBV_WC_SEND, + [XSC_OPCODE_RDMA_REQ_SEND_IMMDT] = IBV_WC_SEND, + [XSC_OPCODE_RDMA_RSP_RECV] = IBV_WC_RECV, + [XSC_OPCODE_RDMA_RSP_RECV_IMMDT] = IBV_WC_RECV, + [XSC_OPCODE_RDMA_REQ_WRITE] = IBV_WC_RDMA_WRITE, + [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE, + [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM, + [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ, +}; + +int xsc_stall_num_loop = 60; +int xsc_stall_cq_poll_min = 60; +int xsc_stall_cq_poll_max = 100000; +int xsc_stall_cq_inc_step = 100; +int xsc_stall_cq_dec_step = 10; + +static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE; +static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) +{ + if (cqe->is_error) + return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR; + if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) { + printf("rdma cqe msg code should be send/write/read\n"); + return XSC_OPCODE_RDMA_CQE_ERROR; + } + return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt]; +} + +static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe) +{ + return (cqe->l4_hdr_type_etc >> 2) & 0x3; +} + +static void *get_cqe(struct xsc_cq *cq, int n) +{ + return cq->active_buf->buf + n * cq->cqe_sz; +} + +static void *get_sw_cqe(struct xsc_cq *cq, int n) +{ + int cid = n & (cq->verbs_cq.cq_ex.cqe - 1); + struct xsc_cqe *cqe = get_cqe(cq, cid); + if (likely(xsc_get_cqe_sw_own(cqe, n, cq->log2_cq_ring_sz))) + return cqe; + else + return NULL; +} + +static void *next_cqe_sw(struct xsc_cq *cq) +{ + return get_sw_cqe(cq, cq->cons_index); +} + +static void update_cons_index(struct xsc_cq *cq) +{ + union xsc_db_data db; + + db.raw_data = cq->cons_index; + db.cqn = cq->cqn; + WR_REG(cq->db, db.raw_data); +} + +static inline void handle_good_req( + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq, uint8_t opcode) +{ + int idx; + struct xsc_send_wqe_ctrl_seg *ctrl; + + wc->opcode = xsc_cqe_opcode[opcode]; + wc->status = IBV_WC_SUCCESS; + idx = RD_LE_16(cqe->wqe_id); + idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt -1); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + if (opcode == XSC_OPCODE_RDMA_REQ_READ) { + ctrl = xsc_get_send_wqe(qp, idx); + wc->byte_len = ctrl->msg_len; + } + wq->flush_wqe_cnt--; + + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, + "wqeid:%u, wq tail:%u\n", idx, wq->tail); +} + +/* Returns IBV_WC_IP_CSUM_OK or 0 */ +static inline int get_csum_ok(struct xsc_cqe64 *cqe) +{ + return (((cqe->hds_ip_ext & (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) == + (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) & + (get_cqe_l3_hdr_type(cqe) == XSC_CQE_L3_HDR_TYPE_IPV4)) + << IBV_WC_IP_CSUM_OK_SHIFT; +} + +static inline void handle_good_responder( + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq, uint8_t opcode) +{ + uint16_t idx; + struct xsc_qp *qp = container_of(wq, struct xsc_qp, rq); + + wc->byte_len = RD_LE_32(cqe->msg_len); + wc->opcode = xsc_cqe_opcode[opcode]; + wc->status = IBV_WC_SUCCESS; + + idx = wq->tail & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + ++wq->tail; + wq->flush_wqe_cnt--; + + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE, + "recv cqe idx:%u, len:%u\n", idx, wc->byte_len); +} + +static void dump_cqe(void *buf) +{ + __le32 *p = buf; + int i; + + for (i = 0; i < 8; i += 4) + printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]); +} + +static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe) +{ + switch (cqe->error_code) { + case XSC_ERR_CODE_NAK_RETRY: + return IBV_WC_RETRY_EXC_ERR; + case XSC_ERR_CODE_NAK_OPCODE: + return IBV_WC_BAD_RESP_ERR; + case XSC_ERR_CODE_NAK_MR: + return IBV_WC_REM_ACCESS_ERR; + case XSC_ERR_CODE_NAK_OPERATION: + return IBV_WC_REM_OP_ERR; + case XSC_ERR_CODE_NAK_RNR: + return IBV_WC_RNR_RETRY_EXC_ERR; + case XSC_ERR_CODE_LOCAL_MR: + return IBV_WC_LOC_PROT_ERR; + case XSC_ERR_CODE_LOCAL_LEN: + return IBV_WC_LOC_LEN_ERR; + case XSC_ERR_CODE_LEN_GEN_CQE: + return IBV_WC_LOC_LEN_ERR; + case XSC_ERR_CODE_OPERATION: + return IBV_WC_LOC_ACCESS_ERR; + case XSC_ERR_CODE_FLUSH: + return IBV_WC_WR_FLUSH_ERR; + case XSC_ERR_CODE_MALF_WQE_HOST: + case XSC_ERR_CODE_STRG_ACC_GEN_CQE: + return IBV_WC_FATAL_ERR; + case XSC_ERR_CODE_OPCODE_GEN_CQE: + case XSC_ERR_CODE_LOCAL_OPCODE: + default: + return IBV_WC_GENERAL_ERR; + } +} + + +static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id) +{ + struct xsc_wq *wq; + struct xsc_send_wqe_ctrl_seg *ctrl; + int idx = 0; + + /* check recv queue work request */ + wq = &qp->rq; + if (wq->head - wq->tail > 0) { + *type = 1; + return true; + } + /* check send queue work request */ + wq = &qp->sq; + while (wq->head - wq->tail > 0) { + idx = wq->tail & (wq->wqe_cnt - 1); + ++wq->tail; + ctrl = xsc_get_send_wqe(qp, idx); + if (ctrl->ce) { + *type = 0; + *wqe_id = idx << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + return true; + } + } + return false; +} + +static inline void handle_bad_req( + struct xsc_context *xctx, + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq) +{ + int idx; + wc->status = xsc_cqe_error_code(cqe); + wc->vendor_err = cqe->error_code; + idx = RD_LE_16(cqe->wqe_id); + idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + idx &= (wq->wqe_cnt -1); + wq->tail = wq->wqe_head[idx] + 1; + wc->wr_id = wq->wrid[idx]; + wq->flush_wqe_cnt--; + + if (cqe->error_code != XSC_ERR_CODE_FLUSH) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } +} + +static inline void handle_bad_responder( + struct xsc_context *xctx, + struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq) +{ + wc->status = xsc_cqe_error_code(cqe); + wc->vendor_err = cqe->error_code; + + ++wq->tail; + wq->flush_wqe_cnt--; + + if (cqe->error_code != XSC_ERR_CODE_FLUSH) { + printf("%s: got completion with error:\n", xctx->hostname); + dump_cqe(cqe); + } +} + +#if defined(__x86_64__) || defined (__i386__) +static inline unsigned long get_cycles(void) +{ + uint32_t low, high; + uint64_t val; + asm volatile ("rdtsc" : "=a" (low), "=d" (high)); + val = high; + val = (val << 32) | low; + return val; +} + +static void xsc_stall_poll_cq(void) +{ + int i; + + for (i = 0; i < xsc_stall_num_loop; i++) + (void)get_cycles(); +} +static void xsc_stall_cycles_poll_cq(uint64_t cycles) +{ + while (get_cycles() < cycles) + ; /* Nothing */ +} +static void xsc_get_cycles(uint64_t *cycles) +{ + *cycles = get_cycles(); +} +#else +static void xsc_stall_poll_cq(void) +{ +} +static void xsc_stall_cycles_poll_cq(uint64_t cycles) +{ +} +static void xsc_get_cycles(uint64_t *cycles) +{ +} +#endif + +static inline int get_qp_ctx(struct xsc_context *xctx, + struct xsc_resource **cur_rsc, + uint32_t qpn) + ALWAYS_INLINE; +static inline int get_qp_ctx(struct xsc_context *xctx, + struct xsc_resource **cur_rsc, + uint32_t qpn) +{ + if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn); + if (unlikely(!*cur_rsc)) + return CQ_POLL_ERR; + } + + return CQ_OK; +} + +static inline int xsc_get_next_cqe(struct xsc_cq *cq, + struct xsc_cqe64 **pcqe64, + void **pcqe) + ALWAYS_INLINE; +static inline int xsc_get_next_cqe(struct xsc_cq *cq, + struct xsc_cqe64 **pcqe64, + void **pcqe) +{ + void *cqe = next_cqe_sw(cq); + if (!cqe) + return CQ_EMPTY; + + ++cq->cons_index; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + udma_from_device_barrier(); + + *pcqe = cqe; + + return CQ_OK; +} + +static inline int xsc_parse_cqe(struct xsc_cq *cq, + struct xsc_cqe *cqe, + struct xsc_resource **cur_rsc, + struct ibv_wc *wc, + int lazy) +{ + struct xsc_wq *wq; + uint32_t qp_id; + uint8_t opcode; + int err = 0; + struct xsc_qp *xqp = NULL; + struct xsc_context *xctx; + + xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context); + qp_id = cqe->qp_id; + qp_id = RD_LE_16(qp_id); + wc->wc_flags = 0; + wc->qp_num = qp_id; + opcode = xsc_get_cqe_opcode(cqe); + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id); + switch (opcode) { + case XSC_OPCODE_RDMA_REQ_SEND_IMMDT: + case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT: + wc->wc_flags |= IBV_WC_WITH_IMM; + SWITCH_FALLTHROUGH; + case XSC_OPCODE_RDMA_REQ_SEND: + case XSC_OPCODE_RDMA_REQ_WRITE: + case XSC_OPCODE_RDMA_REQ_READ: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) + return CQ_EMPTY; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->sq; + handle_good_req(wc, cqe, xqp, wq, opcode); + break; + case XSC_OPCODE_RDMA_RSP_RECV_IMMDT: + case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT: + wc->wc_flags |= IBV_WC_WITH_IMM; + wc->imm_data = cqe->imm_data; + SWITCH_FALLTHROUGH; + case XSC_OPCODE_RDMA_RSP_RECV: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) + return CQ_EMPTY; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->rq; + handle_good_responder(wc, cqe, wq, opcode); + break; + case XSC_OPCODE_RDMA_REQ_ERROR: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) + return CQ_POLL_ERR; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->sq; + handle_bad_req(xctx, wc, cqe, xqp, wq); + break; + case XSC_OPCODE_RDMA_RSP_ERROR: + err = get_qp_ctx(xctx, cur_rsc, qp_id); + if (unlikely(err)) + return CQ_POLL_ERR; + xqp = rsc_to_xqp(*cur_rsc); + wq = &xqp->rq; + handle_bad_responder(xctx, wc, cqe, wq); + break; + case XSC_OPCODE_RDMA_CQE_ERROR: + printf("%s: got completion with cqe format error:\n", xctx->hostname); + dump_cqe(cqe); + SWITCH_FALLTHROUGH; + default: + return CQ_POLL_ERR; + } + return CQ_OK; +} + +static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, + struct xsc_cqe64 *cqe64, + void *cqe, int cqe_ver) + ALWAYS_INLINE; +static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq, + struct xsc_cqe64 *cqe64, + void *cqe, int cqe_ver) +{ + return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1); +} + +static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, + struct ibv_wc *wc) + ALWAYS_INLINE; +static inline int xsc_poll_one(struct xsc_cq *cq, + struct xsc_resource **cur_rsc, + struct ibv_wc *wc) +{ + struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index); + if (cqe == NULL) { + return CQ_EMPTY; + } + memset(wc, 0, sizeof(*wc)); + + ++cq->cons_index; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + udma_from_device_barrier(); + return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0); +} + +static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node, + uint32_t qp_id, struct xsc_wq *wq, uint32_t idx, + struct ibv_wc *wc) +{ + memset(wc, 0, sizeof(*wc)); + if (err_node->is_sq) { + switch (wq->wr_opcode[idx]){ + case IBV_WR_SEND: + case IBV_WR_SEND_WITH_IMM: + case IBV_WR_SEND_WITH_INV: + wc->opcode = IBV_WC_SEND; + break; + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_WRITE_WITH_IMM: + wc->opcode = IBV_WC_RDMA_WRITE; + break; + case IBV_WR_RDMA_READ: + wc->opcode = IBV_WC_RDMA_READ; + } + } else { + wc->opcode = IBV_WC_RECV; + } + + wc->qp_num = qp_id; + wc->status = IBV_WC_WR_FLUSH_ERR; + wc->vendor_err = XSC_ERR_CODE_FLUSH; + wc->wr_id = wq->wrid[idx]; + wq->tail++; + wq->flush_wqe_cnt--; +} + +static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, + int ne, int *npolled, struct ibv_wc *wc) +{ + uint32_t qp_id = 0; + uint32_t flush_wqe_cnt = 0; + int sw_npolled = 0; + int ret = 0; + uint32_t idx = 0; + struct xsc_err_state_qp_node *err_qp_node, *tmp; + struct xsc_resource *res = NULL; + struct xsc_context *xctx = to_xctx(ibcq->context); + struct xsc_cq *cq = to_xcq(ibcq); + struct xsc_wq *wq; + + list_for_each_safe(&cq->err_state_qp_list, err_qp_node, tmp, entry) { + if (!err_qp_node) + break; + + sw_npolled = 0; + qp_id = err_qp_node->qp_id; + ret = get_qp_ctx(xctx, &res, qp_id); + if (unlikely(ret)) + continue; + wq = err_qp_node->is_sq ? &(rsc_to_xqp(res)->sq):&(rsc_to_xqp(res)->rq); + flush_wqe_cnt = wq->flush_wqe_cnt; + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "is_sq %d, flush_wq_cnt %d, ne %d, npolled %d, qp_id %d\n", + err_qp_node->is_sq, wq->flush_wqe_cnt, ne, *npolled, qp_id); + + if (flush_wqe_cnt <= (ne - *npolled)) { + while (sw_npolled < flush_wqe_cnt) { + idx = wq->tail & (wq->wqe_cnt - 1); + if (err_qp_node->is_sq && !wq->need_flush[idx]) { + wq->tail++; + continue; + } else { + gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, + idx, wc + *npolled + sw_npolled); + ++sw_npolled; + } + } + list_del(&err_qp_node->entry); + free(err_qp_node); + *npolled += sw_npolled; + } else { + while (sw_npolled < (ne - *npolled)) { + idx = wq->tail & (wq->wqe_cnt - 1); + if (err_qp_node->is_sq && !wq->need_flush[idx]) { + wq->tail++; + continue; + } else { + gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq, + idx, wc + *npolled + sw_npolled); + ++sw_npolled; + } + } + *npolled = ne; + break; + } + } + + return 0; +} + +static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ALWAYS_INLINE; +static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +{ + struct xsc_cq *cq = to_xcq(ibcq); + struct xsc_resource *rsc = NULL; + int npolled = 0; + int err = CQ_OK; + uint32_t next_cid = cq->cons_index; + + xsc_spin_lock(&cq->lock); + for (npolled = 0; npolled < ne; ++npolled) { + err = xsc_poll_one(cq, &rsc, wc + npolled); + if (err != CQ_OK) + break; + } + + if (err == CQ_EMPTY) { + if (npolled < ne && !(list_empty(&cq->err_state_qp_list))) { + xsc_generate_flush_err_cqe(ibcq, ne, &npolled, wc); + } + } + + udma_to_device_barrier(); + if (next_cid != cq->cons_index) + update_cons_index(cq); + xsc_spin_unlock(&cq->lock); + + return err == CQ_POLL_ERR ? err : npolled; +} + +enum polling_mode { + POLLING_MODE_NO_STALL, + POLLING_MODE_STALL, + POLLING_MODE_STALL_ADAPTIVE +}; + +static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, + int lock, enum polling_mode stall) + ALWAYS_INLINE; +static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq, + int lock, enum polling_mode stall) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + update_cons_index(cq); + + if (lock) + xsc_spin_unlock(&cq->lock); + + if (stall) { + if (stall == POLLING_MODE_STALL_ADAPTIVE) { + if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { + cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, + xsc_stall_cq_poll_min); + xsc_get_cycles(&cq->stall_last_count); + } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) { + cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step, + xsc_stall_cq_poll_max); + xsc_get_cycles(&cq->stall_last_count); + } else { + cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, + xsc_stall_cq_poll_min); + cq->stall_last_count = 0; + } + } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) { + cq->stall_next_poll = 1; + } + + cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL); + } +} + +static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, + int lock, enum polling_mode stall, + int cqe_version, int clock_update) + ALWAYS_INLINE; +static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, + int lock, enum polling_mode stall, + int cqe_version, int clock_update) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe64 *cqe64; + void *cqe; + int err; + + if (unlikely(attr->comp_mask)) + return EINVAL; + + if (stall) { + if (stall == POLLING_MODE_STALL_ADAPTIVE) { + if (cq->stall_last_count) + xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); + } else if (cq->stall_next_poll) { + cq->stall_next_poll = 0; + xsc_stall_poll_cq(); + } + } + + if (lock) + xsc_spin_lock(&cq->lock); + + cq->cur_rsc = NULL; + + err = xsc_get_next_cqe(cq, &cqe64, &cqe); + if (err == CQ_EMPTY) { + if (lock) + xsc_spin_unlock(&cq->lock); + + if (stall) { + if (stall == POLLING_MODE_STALL_ADAPTIVE) { + cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, + xsc_stall_cq_poll_min); + xsc_get_cycles(&cq->stall_last_count); + } else { + cq->stall_next_poll = 1; + } + } + + return ENOENT; + } + + if (stall) + cq->flags |= XSC_CQ_FLAGS_FOUND_CQES; + + err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); + if (lock && err) + xsc_spin_unlock(&cq->lock); + + if (stall && err) { + if (stall == POLLING_MODE_STALL_ADAPTIVE) { + cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step, + xsc_stall_cq_poll_min); + cq->stall_last_count = 0; + } + + cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES); + + goto out; + } + + if (clock_update && !err) + err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info); + +out: + return err; +} + +static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, + enum polling_mode stall, int cqe_version) + ALWAYS_INLINE; +static inline int xsc_next_poll(struct ibv_cq_ex *ibcq, + enum polling_mode stall, + int cqe_version) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_cqe64 *cqe64; + void *cqe; + int err; + + err = xsc_get_next_cqe(cq, &cqe64, &cqe); + if (err == CQ_EMPTY) { + if (stall == POLLING_MODE_STALL_ADAPTIVE) + cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL; + + return ENOENT; + } + + return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); +} + +static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) +{ + return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); +} + +static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) +{ + return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); +} + +static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq) +{ + return xsc_next_poll(ibcq, 0, 0); +} + +static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq) +{ + return xsc_next_poll(ibcq, 0, 1); +} + +static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, 0, 0, 0); +} + +static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, 0, 1, 0); +} + +static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, 0, 0, 0); +} + +static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, 0, 1, 0); +} + +static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +} + +static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0); +} + +static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +} + +static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0); +} + +static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0); +} + +static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0); +} + +static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0); +} + +static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0); +} + +static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, 0, 0, 1); +} + +static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, 0, 1, 1); +} + +static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, 0, 1, 1); +} + +static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, 0, 0, 1); +} + +static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1); +} + +static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1); +} + +static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1); +} + +static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1); +} + +static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +} + +static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +} + +static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1); +} + +static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq, + struct ibv_poll_cq_attr *attr) +{ + return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1); +} + +static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); +} + +static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL); +} + +static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); +} + +static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL); +} + +static inline void xsc_end_poll(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 0, 0); +} + +static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq) +{ + _xsc_end_poll(ibcq, 1, 0); +} + +int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +{ + return poll_cq(ibcq, ne, wc); +} + +static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + switch (xscdv_get_cqe_opcode(cq->cqe64)) { + case XSC_CQE_RESP_WR_IMM: + return IBV_WC_RECV_RDMA_WITH_IMM; + case XSC_CQE_RESP_SEND: + case XSC_CQE_RESP_SEND_IMM: + case XSC_CQE_RESP_SEND_INV: + if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { + switch (cq->cqe64->app_op) { + case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: + case XSC_CQE_APP_OP_TM_CONSUMED_MSG: + case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: + case XSC_CQE_APP_OP_TM_EXPECTED: + case XSC_CQE_APP_OP_TM_UNEXPECTED: + return IBV_WC_TM_RECV; + case XSC_CQE_APP_OP_TM_NO_TAG: + return IBV_WC_TM_NO_TAG; + } + } + return IBV_WC_RECV; + case XSC_CQE_NO_PACKET: + switch (cq->cqe64->app_op) { + case XSC_CQE_APP_OP_TM_REMOVE: + return IBV_WC_TM_DEL; + case XSC_CQE_APP_OP_TM_APPEND: + return IBV_WC_TM_ADD; + case XSC_CQE_APP_OP_TM_NOOP: + return IBV_WC_TM_SYNC; + case XSC_CQE_APP_OP_TM_CONSUMED: + return IBV_WC_TM_RECV; + } + break; + case XSC_CQE_REQ: + switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { + case XSC_OPCODE_RDMA_WRITE_IMM: + case XSC_OPCODE_RDMA_WRITE: + return IBV_WC_RDMA_WRITE; + case XSC_OPCODE_SEND_IMM: + case XSC_OPCODE_SEND: + case XSC_OPCODE_SEND_INVAL: + return IBV_WC_SEND; + case XSC_OPCODE_RDMA_READ: + return IBV_WC_RDMA_READ; + case XSC_OPCODE_ATOMIC_CS: + return IBV_WC_COMP_SWAP; + case XSC_OPCODE_ATOMIC_FA: + return IBV_WC_FETCH_ADD; + case XSC_OPCODE_UMR: + return cq->umr_opcode; + case XSC_OPCODE_TSO: + return IBV_WC_TSO; + } + } + + return 0; +} + +static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; +} + +static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + int wc_flags = 0; + + if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID) + wc_flags = get_csum_ok(cq->cqe64); + + switch (xscdv_get_cqe_opcode(cq->cqe64)) { + case XSC_CQE_RESP_WR_IMM: + case XSC_CQE_RESP_SEND_IMM: + wc_flags |= IBV_WC_WITH_IMM; + break; + case XSC_CQE_RESP_SEND_INV: + wc_flags |= IBV_WC_WITH_INV; + break; + } + + if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ) + wc_flags |= IBV_WC_TM_SYNC_REQ; + + if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) { + switch (cq->cqe64->app_op) { + case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV: + case XSC_CQE_APP_OP_TM_CONSUMED_MSG: + case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED: + /* Full completion */ + wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID); + break; + case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV: + case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */ + wc_flags |= IBV_WC_TM_MATCH; + break; + case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */ + wc_flags |= IBV_WC_TM_DATA_VALID; + break; + } + } + + wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; + return wc_flags; +} + +static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be32toh(cq->cqe64->byte_cnt); +} + +static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64; + + return ecqe->vendor_err_synd; +} + +static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + switch (xscdv_get_cqe_opcode(cq->cqe64)) { + case XSC_CQE_RESP_SEND_INV: + /* This is returning invalidate_rkey which is in host order, see + * ibv_wc_read_invalidated_rkey + */ + return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey); + default: + return cq->cqe64->imm_inval_pkey; + } +} + +static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return (uint32_t)be16toh(cq->cqe64->slid); +} + +static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; +} + +static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; +} + +static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return cq->cqe64->ml_path & 0x7f; +} + +static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be64toh(cq->cqe64->timestamp); +} + +static inline uint64_t +xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return xscdv_ts_to_ns(&cq->last_clock_info, + xsc_cq_read_wc_completion_ts(ibcq)); +} + +static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be16toh(cq->cqe64->vlan_info); +} + +static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK; +} + +static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq, + struct ibv_wc_tm_info *tm_info) +{ + struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq)); + + tm_info->tag = be64toh(cq->cqe64->tmh.tag); + tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx); +} + +#define BIT(i) (1UL << (i)) + +#define SINGLE_THREADED BIT(0) +#define STALL BIT(1) +#define V1 BIT(2) +#define ADAPTIVE BIT(3) +#define CLOCK_UPDATE BIT(4) + +#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \ + xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update +#define xsc_next_poll_name(cqe_ver, adaptive) \ + xsc_next_poll##adaptive##cqe_ver +#define xsc_end_poll_name(lock, stall, adaptive) \ + xsc_end_poll##adaptive##stall##lock + +#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \ + .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \ + .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \ + .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \ + } + +static const struct op +{ + int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); + int (*next_poll)(struct ibv_cq_ex *ibcq); + void (*end_poll)(struct ibv_cq_ex *ibcq); +} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = { + [V1] = POLL_FN_ENTRY(_v1, _lock, , ,), + [0] = POLL_FN_ENTRY(_v0, _lock, , ,), + [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ), + [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ), + [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ), + [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ), + [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ), + [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ), + [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ), + [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ), + [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ), + [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ), + [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update), + [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update), + [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update), + [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update), + [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update), + [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update), + [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update), + [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update), + [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update), + [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update), + [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update), + [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update), +}; + +int xsc_cq_fill_pfns(struct xsc_cq *cq, + const struct ibv_cq_init_attr_ex *cq_attr, + struct xsc_context *xctx) +{ + const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | + (xctx->cqe_version ? V1 : 0) | + (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ? + SINGLE_THREADED : 0) | + (cq->stall_enable ? STALL : 0) | + ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ? + CLOCK_UPDATE : 0)]; + + cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll; + cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll; + cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll; + + cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode; + cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err; + cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) + cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) + cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) + cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) + cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) + cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) + cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) + cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) + cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) + cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) + cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO) + cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info; + if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) { + if (!xctx->clock_info_page) + return EOPNOTSUPP; + cq->verbs_cq.cq_ex.read_completion_wallclock_ns = + xsc_cq_read_wc_completion_wallclock_ns; + } + + return 0; +} + +int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited) +{ + struct xsc_cq *cq = to_xcq(ibvcq); + union xsc_db_data doorbell; + + doorbell.cqn = cq->cqn; + doorbell.cq_next_cid = cq->cons_index; + doorbell.solicited = !!solicited; + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI WC MMIO. + */ + mmio_wc_start(); + + WR_REG(cq->armdb, doorbell.raw_data); + + mmio_flush_writes(); + + return 0; +} + +void xsc_cq_event(struct ibv_cq *cq) +{ + to_xcq(cq)->arm_sn++; +} + +static int is_equal_rsn(struct xsc_cqe64 *cqe64, uint32_t rsn) +{ + return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff); +} + +static inline int is_equal_uidx(struct xsc_cqe64 *cqe64, uint32_t uidx) +{ + return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff); +} + +static inline int is_responder(uint8_t opcode) +{ + switch (opcode) { + case XSC_CQE_RESP_WR_IMM: + case XSC_CQE_RESP_SEND: + case XSC_CQE_RESP_SEND_IMM: + case XSC_CQE_RESP_SEND_INV: + case XSC_CQE_RESP_ERR: + return 1; + } + + return 0; +} + +static inline int free_res_cqe(struct xsc_cqe64 *cqe64, uint32_t rsn, int cqe_version) +{ + if (cqe_version) { + if (is_equal_uidx(cqe64, rsn)) { + return 1; + } + } else { + if (is_equal_rsn(cqe64, rsn)) { + return 1; + } + } + + return 0; +} + +void __xsc_cq_clean(struct xsc_cq *cq, uint32_t rsn) +{ + uint32_t prod_index; + int nfreed = 0; + struct xsc_cqe64 *cqe64, *dest64; + void *cqe, *dest; + uint8_t owner_bit; + int cqe_version; + + if (!cq || cq->flags & XSC_CQ_FLAGS_DV_OWNED) + return; + xsc_dbg(to_xctx(cq->verbs_cq.cq_ex.context)->dbg_fp, XSC_DBG_CQ, "\n"); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) + if (prod_index == cq->cons_index + cq->verbs_cq.cq_ex.cqe) + break; + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + cqe_version = (to_xctx(cq->verbs_cq.cq_ex.context))->cqe_version; + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & (cq->verbs_cq.cq_ex.cqe - 1)); + cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; + if (free_res_cqe(cqe64, rsn, cqe_version)) { + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & (cq->verbs_cq.cq_ex.cqe - 1)); + dest64 = (cq->cqe_sz == 64) ? dest : dest + 64; + owner_bit = dest64->op_own & XSC_CQE_OWNER_MASK; + memcpy(dest, cqe, cq->cqe_sz); + dest64->op_own = owner_bit | + (dest64->op_own & ~XSC_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + udma_to_device_barrier(); + update_cons_index(cq); + } +} + +void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn) +{ + xsc_spin_lock(&cq->lock); + __xsc_cq_clean(cq, qpn); + xsc_spin_unlock(&cq->lock); +} + +int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, + struct xsc_buf *buf, int nent, int cqe_sz) +{ + struct xsc_device *xdev = to_xdev(xctx->ibv_ctx.context.device); + int ret; + enum xsc_alloc_type type; + enum xsc_alloc_type default_type = XSC_ALLOC_TYPE_ANON; + + if (xsc_use_huge("HUGE_CQ")) + default_type = XSC_ALLOC_TYPE_HUGE; + + xsc_get_alloc_type(xctx, XSC_CQ_PREFIX, &type, default_type); + + ret = xsc_alloc_prefered_buf(xctx, buf, + align(nent * cqe_sz, xdev->page_size), + xdev->page_size, + type, + XSC_CQ_PREFIX); + + if (ret) + return -1; + + memset(buf->buf, 0, nent * cqe_sz); + + return 0; +} + +int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf) +{ + return xsc_free_actual_buf(ctx, buf); +} diff --git a/providers/xscale/cqm_csr_defines.h b/providers/xscale/cqm_csr_defines.h new file mode 100644 index 0000000..9d87438 --- /dev/null +++ b/providers/xscale/cqm_csr_defines.h @@ -0,0 +1,180 @@ +#ifndef _CQM_CSR_DEFINES_H_ +#define _CQM_CSR_DEFINES_H_ + +#define CQM_SOFT_RESET_REG_ADDR 0x6000 +#define CQM_SOFT_RESET_MASK 0x1 +#define CQM_SOFT_RESET_SHIFT 0 + +#define CQM_COUNTER_CONFIG_REG_ADDR 0x6020 +#define CQM_CFG_CNT_WRAP_MASK 0x1 +#define CQM_CFG_CNT_WRAP_SHIFT 0 +#define CQM_CFG_CNT_RC_MASK 0x2 +#define CQM_CFG_CNT_RC_SHIFT 1 + +#define CQM_SCRATCH_PAD_REG_ADDR 0x6040 +#define CQM_SCRATCH_PAD_MASK 0xffffffffffffffff +#define CQM_SCRATCH_PAD_SHIFT 0 + +#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x6060 +#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 16 +#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 +#define CQM_CFG_CPU2CQM_RING_ADDR_MASK 0xffffffffffffffff +#define CQM_CFG_CPU2CQM_RING_ADDR_SHIFT 0 + +#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x6260 +#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 16 +#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 +#define CQM_CFG_CPU2CQM_RING_SIZE_MASK 0xffff +#define CQM_CFG_CPU2CQM_RING_SIZE_SHIFT 0 + +#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_ADDR 0x6460 +#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_SIZE 16 +#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_STRIDE 0x20 +#define CQM_CFG_CPU2CQM_NEXT_CID_MASK 0xffff +#define CQM_CFG_CPU2CQM_NEXT_CID_SHIFT 0 + +#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x6660 +#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 16 +#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 +#define CQM_CFG_CPU2CQM_CFG_EN_MASK 0x1 +#define CQM_CFG_CPU2CQM_CFG_EN_SHIFT 0 + +#define CQM_CQM_CONFIG_CQE_FIFO_TH_ADDR 0x6860 +#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_MASK 0xff +#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_SHIFT 0 +#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_MASK 0xff00 +#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_SHIFT 8 + +#define CQM_CQM_CONFIG_CID_FIFO_TH_ADDR 0x6880 +#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_MASK 0xff +#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_SHIFT 0 +#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_MASK 0xff00 +#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_SHIFT 8 + +#define CQM_CQM_STATUS_REG_ARRAY_ADDR 0x68a0 +#define CQM_CQM_STATUS_REG_ARRAY_SIZE 16 +#define CQM_CQM_STATUS_REG_ARRAY_STRIDE 0x20 +#define CQM_CFG_CQM2CPU_DONE_PID_MASK 0xffff +#define CQM_CFG_CQM2CPU_DONE_PID_SHIFT 0 + +#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_ADDR 0x6aa0 +#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_SIZE 16 +#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_STRIDE 0x20 +#define CQM_CQM_LOCAL_NEXT_PID_MASK 0xffff +#define CQM_CQM_LOCAL_NEXT_PID_SHIFT 0 + +#define CQM_CQM_DMA_REQ_LEN_STATE_REG_ADDR 0x6ca0 +#define CQM_CQM_DMA_REQ_LEN_MASK 0x3ff +#define CQM_CQM_DMA_REQ_LEN_SHIFT 0 + +#define CQM_CQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x6cc0 +#define CQM_CQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_REQ_ADDR_SHIFT 0 + +#define CQM_CQM_CQE_L_QPID_STATE_REG_ADDR 0x6ce0 +#define CQM_CQM_CQE_L_QP_ID_MASK 0xffffff +#define CQM_CQM_CQE_L_QP_ID_SHIFT 0 + +#define CQM_CQM_CQE_MSG_LEN_STATE_REG_ADDR 0x6d00 +#define CQM_CQM_CQE_MSG_LEN_MASK 0xffffffff +#define CQM_CQM_CQE_MSG_LEN_SHIFT 0 + +#define CQM_CQM_CQE_ERR_CODE_STATE_REG_ADDR 0x6d20 +#define CQM_CQM_CQE_ERR_CODE_MASK 0xff +#define CQM_CQM_CQE_ERR_CODE_SHIFT 0 + +#define CQM_CQM_CQE_MSG_OPCODE_STATE_REG_ADDR 0x6d40 +#define CQM_CQM_CQE_MSG_OPCODE_MASK 0xff +#define CQM_CQM_CQE_MSG_OPCODE_SHIFT 0 + +#define CQM_CQM_CQE_WQEID_STATE_REG_ADDR 0x6d60 +#define CQM_CQM_CQE_WQEID_MASK 0xffff +#define CQM_CQM_CQE_WQEID_SHIFT 0 + +#define CQM_CQM_CQE_TX0RX1_STATE_REG_ADDR 0x6d80 +#define CQM_CQM_CQE_TX0RX1_MASK 0x1 +#define CQM_CQM_CQE_TX0RX1_SHIFT 0 + +#define CQM_CQM_CQE_CQ_ID_STATE_REG_ADDR 0x6da0 +#define CQM_CQM_CQE_CQ_ID_MASK 0xf +#define CQM_CQM_CQE_CQ_ID_SHIFT 0 + +#define CQM_CQM_WR_ACK_CNT_STATE_REG_ADDR 0x6dc0 +#define CQM_CQM_DMA_WR_ACK_MASK 0xff +#define CQM_CQM_DMA_WR_ACK_SHIFT 0 + +#define CQM_CQM_RD_ACK_CNT_STATE_REG_ADDR 0x6de0 +#define CQM_CQM_DMA_RD_ACK_MASK 0xff +#define CQM_CQM_DMA_RD_ACK_SHIFT 0 + +#define CQM_CQM_CQE_ACK_CNT_STATE_REG_ADDR 0x6e00 +#define CQM_CQM_DMA_CQE_ACK_MASK 0xff +#define CQM_CQM_DMA_CQE_ACK_SHIFT 0 + +#define CQM_CQM_CMD_FIFO_STATE_REG_ADDR 0x6e20 +#define CQM_CQM_FIFO_OVFL_INT_MASK 0x3 +#define CQM_CQM_FIFO_OVFL_INT_SHIFT 0 +#define CQM_CQM_FIFO_UNFL_INT_MASK 0xc +#define CQM_CQM_FIFO_UNFL_INT_SHIFT 2 +#define CQM_CQM_FIFO_MTY_MASK 0x30 +#define CQM_CQM_FIFO_MTY_SHIFT 4 +#define CQM_CQM_FIFO_FUL_MASK 0xc0 +#define CQM_CQM_FIFO_FUL_SHIFT 6 +#define CQM_CQM_RING_FULL_INT_MASK 0xffff00 +#define CQM_CQM_RING_FULL_INT_SHIFT 8 +#define CQM_CQM_DEFINE_ERR_INT_MASK 0x1000000 +#define CQM_CQM_DEFINE_ERR_INT_SHIFT 24 +#define CQM_CQM_SOP_EOP_NO_EQUAL_MASK 0x2000000 +#define CQM_CQM_SOP_EOP_NO_EQUAL_SHIFT 25 + +#define CQM_CQM_FIFO_USED_CNT_REG_ADDR 0x6e40 +#define CQM_CQM_FIFO_USED_CNT_REG_SIZE 2 +#define CQM_CQM_FIFO_USED_CNT_REG_STRIDE 0x20 +#define CQM_CQM_FIFO_USED_CNT_MASK 0x7f +#define CQM_CQM_FIFO_USED_CNT_SHIFT 0 + +#define CQM_CQM_DEBUG_INFO_STATE_REG_0_ADDR 0x6e80 +#define CQM_CQM2CSR_DBG_OPCODE_MASK 0xff +#define CQM_CQM2CSR_DBG_OPCODE_SHIFT 0 +#define CQM_CQM2CSR_DBG_TX0_RX1_MASK 0x100 +#define CQM_CQM2CSR_DBG_TX0_RX1_SHIFT 8 +#define CQM_CQM2CSR_DBG_CAP_MASK 0x200 +#define CQM_CQM2CSR_DBG_CAP_SHIFT 9 +#define CQM_CQM2CSR_DBG_L_QPID_MASK 0x1c00 +#define CQM_CQM2CSR_DBG_L_QPID_SHIFT 10 +#define CQM_CQM2CSR_DBG_SN_MASK 0x1fffffe000 +#define CQM_CQM2CSR_DBG_SN_SHIFT 13 + +#define CQM_CQM_DEBUG_INFO_STATE_REG_1_ADDR 0x6ea0 +#define CQM_CQM2CSR_DBG_MOD_IF_BM_MASK 0xffffffffffffffff +#define CQM_CQM2CSR_DBG_MOD_IF_BM_SHIFT 0 + +#define CQM_CQM_DMA_IN_SOP_CNT_REG_ADDR 0x6ec0 +#define CQM_CQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_IN_SOP_CNT_SHIFT 0 + +#define CQM_CQM_DMA_IN_EOP_CNT_REG_ADDR 0x6ee0 +#define CQM_CQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_IN_EOP_CNT_SHIFT 0 + +#define CQM_CQM_DMA_IN_VLD_CNT_REG_ADDR 0x6f00 +#define CQM_CQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_IN_VLD_CNT_SHIFT 0 + +#define CQM_CQM_DMA_REQ_CNT_REG_ADDR 0x6f20 +#define CQM_CQM_DMA_REQ_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_REQ_CNT_SHIFT 0 + +#define CQM_CQM_DMA_GNT_CNT_REG_ADDR 0x6f40 +#define CQM_CQM_DMA_GNT_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_GNT_CNT_SHIFT 0 + +#define CQM_CQM_DMA_ACK_VLD_CNT_REG_ADDR 0x6f60 +#define CQM_CQM_DMA_ACK_VLD_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_DMA_ACK_VLD_CNT_SHIFT 0 + +#define CQM_CQM_MER2CQM_VLD_CNT_REG_ADDR 0x6f80 +#define CQM_CQM_MER2CQM_VLD_CNT_MASK 0xffffffffffffffff +#define CQM_CQM_MER2CQM_VLD_CNT_SHIFT 0 + +#endif diff --git a/providers/xscale/dbrec.c b/providers/xscale/dbrec.c new file mode 100644 index 0000000..3987b88 --- /dev/null +++ b/providers/xscale/dbrec.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#define _GNU_SOURCE +#include + +#include +#include +#include + +#include "xscale.h" + +struct xsc_db_page { + struct xsc_db_page *prev, *next; + struct xsc_buf buf; + int num_db; + int use_cnt; + unsigned long free[0]; +}; + +static struct xsc_db_page *__add_page(struct xsc_context *context) +{ + struct xsc_db_page *page; + int ps = to_xdev(context->ibv_ctx.context.device)->page_size; + int pp; + int i; + int nlong; + int ret; + + pp = ps / context->cache_line_size; + nlong = (pp + 8 * sizeof(long) - 1) / (8 * sizeof(long)); + + page = malloc(sizeof *page + nlong * sizeof(long)); + if (!page) + return NULL; + + if (xsc_is_extern_alloc(context)) + ret = xsc_alloc_buf_extern(context, &page->buf, ps); + else + ret = xsc_alloc_buf(&page->buf, ps, ps); + if (ret) { + free(page); + return NULL; + } + + page->num_db = pp; + page->use_cnt = 0; + for (i = 0; i < nlong; ++i) + page->free[i] = ~0; + + page->prev = NULL; + page->next = context->db_list; + context->db_list = page; + if (page->next) + page->next->prev = page; + + return page; +} + +__be32 *xsc_alloc_dbrec(struct xsc_context *context) +{ + struct xsc_db_page *page; + __be32 *db = NULL; + int i, j; + + pthread_mutex_lock(&context->db_list_mutex); + + for (page = context->db_list; page; page = page->next) + if (page->use_cnt < page->num_db) + goto found; + + page = __add_page(context); + if (!page) + goto out; + +found: + ++page->use_cnt; + + for (i = 0; !page->free[i]; ++i) + /* nothing */; + + j = ffsl(page->free[i]); + --j; + page->free[i] &= ~(1UL << j); + db = page->buf.buf + (i * 8 * sizeof(long) + j) * context->cache_line_size; + +out: + pthread_mutex_unlock(&context->db_list_mutex); + + return db; +} + +void xsc_free_db(struct xsc_context *context, __be32 *db) +{ + struct xsc_db_page *page; + uintptr_t ps = to_xdev(context->ibv_ctx.context.device)->page_size; + int i; + + pthread_mutex_lock(&context->db_list_mutex); + + for (page = context->db_list; page; page = page->next) + if (((uintptr_t) db & ~(ps - 1)) == (uintptr_t) page->buf.buf) + break; + + if (!page) + goto out; + + i = ((void *) db - page->buf.buf) / context->cache_line_size; + page->free[i / (8 * sizeof(long))] |= 1UL << (i % (8 * sizeof(long))); + + if (!--page->use_cnt) { + if (page->prev) + page->prev->next = page->next; + else + context->db_list = page->next; + if (page->next) + page->next->prev = page->prev; + + if (page->buf.type == XSC_ALLOC_TYPE_EXTERNAL) + xsc_free_buf_extern(context, &page->buf); + else + xsc_free_buf(&page->buf); + + free(page); + } + +out: + pthread_mutex_unlock(&context->db_list_mutex); +} diff --git a/providers/xscale/libxsc.map b/providers/xscale/libxsc.map new file mode 100644 index 0000000..005c161 --- /dev/null +++ b/providers/xscale/libxsc.map @@ -0,0 +1,59 @@ +/* Export symbols should be added below according to + Documentation/versioning.md document. */ +XSC_1.0 { + global: + xscdv_query_device; + xscdv_init_obj; + local: *; +}; + +XSC_1.1 { + global: + xscdv_create_cq; +} XSC_1.0; + +XSC_1.2 { + global: + xscdv_init_obj; + xscdv_set_context_attr; +} XSC_1.1; + +XSC_1.3 { + global: + xscdv_create_qp; + xscdv_create_wq; +} XSC_1.2; + +XSC_1.4 { + global: + xscdv_get_clock_info; +} XSC_1.3; + +XSC_1.5 { + global: + xscdv_create_flow_action_esp; +} XSC_1.4; + +XSC_1.6 { + global: + xscdv_create_flow_matcher; + xscdv_destroy_flow_matcher; + xscdv_create_flow; +} XSC_1.5; + +XSC_1.7 { + global: + xscdv_create_flow_action_modify_header; + xscdv_create_flow_action_packet_reformat; + xscdv_devx_alloc_uar; + xscdv_devx_free_uar; + xscdv_devx_general_cmd; + xscdv_devx_obj_create; + xscdv_devx_obj_destroy; + xscdv_devx_obj_modify; + xscdv_devx_obj_query; + xscdv_devx_query_eqn; + xscdv_devx_umem_dereg; + xscdv_devx_umem_reg; + xscdv_open_device; +} XSC_1.6; diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c new file mode 100644 index 0000000..04e87e2 --- /dev/null +++ b/providers/xscale/qp.c @@ -0,0 +1,678 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "xscale.h" +#include "wqe.h" +#include "xsc_hsi.h" + +static const uint32_t xsc_ib_opcode[] = { + [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND, + [IBV_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND, + [IBV_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE, + [IBV_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE, + [IBV_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ, + [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND, +}; + +static void *get_recv_wqe(struct xsc_qp *qp, int n) +{ + return qp->rq_start + (n << qp->rq.wqe_shift); +} + +static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n) +{ + return rwq->pbuff + (n << rwq->rq.wqe_shift); +} + +static void *get_seg_wqe(void *first, int n) +{ + return first + (n << XSC_BASE_WQE_SHIFT); +} + +void *xsc_get_send_wqe(struct xsc_qp *qp, int n) +{ + return qp->sq_start + (n << qp->sq.wqe_shift); +} + +void xsc_init_rwq_indices(struct xsc_rwq *rwq) +{ + rwq->rq.head = 0; + rwq->rq.tail = 0; +} + +void xsc_init_qp_indices(struct xsc_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.cur_post = 0; +} + +static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq) +{ + unsigned cur; + + cur = wq->head - wq->tail; + if (cur + nreq < wq->max_post) + return 0; + + xsc_spin_lock(&cq->lock); + cur = wq->head - wq->tail; + xsc_spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg, + uint32_t msg_len, uint64_t remote_addr, uint32_t rkey) +{ + WR_LE_32(remote_seg->seg_len, msg_len); + WR_LE_32(remote_seg->mkey, rkey); + WR_LE_64(remote_seg->va, remote_addr); +} + +static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg) +{ + WR_LE_32(data_seg->seg_len, sg->length); + WR_LE_32(data_seg->mkey, sg->lkey); + WR_LE_64(data_seg->va, sg->addr); +} + +static __be32 send_ieth(struct ibv_send_wr *wr) +{ + switch (wr->opcode) { + case IBV_WR_SEND_WITH_IMM: + case IBV_WR_RDMA_WRITE_WITH_IMM: + return wr->imm_data; + default: + return 0; + } +} + +static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr, + struct xsc_send_wqe_ctrl_seg *ctrl) +{ + void *data_seg; + unsigned seg_index; + void *addr; + int len = 0; + int i; + const int ds_len = sizeof(struct xsc_wqe_data_seg); + int left_len = 0; + int msg_len = ctrl->msg_len; + + if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM) + seg_index = 1; + else + seg_index = 2; + + if (unlikely(msg_len > qp->max_inline_data)) + return ENOMEM; + + for (i = 0; i < wr->num_sge; ++i) { + if (likely(wr->sg_list[i].length)) { + addr = (void*)wr->sg_list[i].addr; + len = wr->sg_list[i].length; + if (left_len > 0) { + int copy_len = min_t(int, len, left_len); + memcpy(data_seg, addr, copy_len); + addr += copy_len; + len -= copy_len; + } + + while (len >= ds_len) { + data_seg = get_seg_wqe(ctrl, seg_index); + seg_index++; + memcpy(data_seg, addr, ds_len); + addr += ds_len; + len -= ds_len; + } + + if (len > 0) { + data_seg = get_seg_wqe(ctrl, seg_index); + seg_index++; + memcpy(data_seg, addr, len); + data_seg += len; + left_len = ds_len - len; + } else { + left_len = 0; + } + } + } + + ctrl->ds_data_num = seg_index - 1; + + return 0; +} + +static void zero_send_ds(int idx, struct xsc_qp *qp) +{ + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)xsc_get_send_wqe(qp, idx); + for (i = 1; i < qp->sq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } +} + +static void zero_recv_ds(int idx, struct xsc_qp *qp) +{ + void *seg; + uint64_t *uninitialized_var(p); + int i; + + seg = (void*)get_recv_wqe(qp, idx); + for (i = 1; i < qp->rq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + p[0] = p[1] = 0; + } +} + +#ifdef XSC_DEBUG +static void dump_wqe(int type, int idx, struct xsc_qp *qp) +{ + /* type0 send type1 recv */ + uint32_t *uninitialized_var(p); + int i; + void *seg; + + if (type == 0) { + seg = (void*)xsc_get_send_wqe(qp, idx); + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, + "dump send wqe at %p\n", seg); + for (i = 0; i < qp->sq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, + "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); + } + } else if (type == 1) { + seg = (void*)get_recv_wqe(qp, idx); + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, + "dump recv wqe at %p\n", seg); + for (i = 0; i < qp->rq.seg_cnt; i++) { + p = get_seg_wqe(seg, i); + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, + "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]); + } + } else { + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP, + "unknown type %d\n", type); + } +} +#else +static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {}; +#endif + +static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq) +{ + uint16_t next_pid; + union xsc_db_data db; + + if (unlikely(!nreq)) + return; + + qp->sq.head += nreq; + next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT); + db.sq_next_pid = next_pid; + db.sqn = qp->sqn; + /* + * Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq); + udma_to_device_barrier(); + WR_REG(qp->sq.db, db.raw_data); +} + +static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + struct xsc_qp *qp = to_xqp(ibqp); + void *seg; + struct xsc_send_wqe_ctrl_seg *ctrl; + struct xsc_wqe_data_seg *data_seg; + + int nreq; + int err = 0; + int i; + unsigned idx; + unsigned seg_index = 1; + unsigned msg_len = 0; + + if (unlikely(ibqp->state < IBV_QPS_RTS)) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "qp state is %u, should not post send\n", ibqp->state); + err = EINVAL; + *bad_wr = wr; + return err; + } + + xsc_spin_lock(&qp->sq.lock); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + seg_index = 1; + msg_len = 0; + if (unlikely(wr->opcode < 0 || + wr->opcode >= sizeof(xsc_ib_opcode) / sizeof(xsc_ib_opcode[0]))) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "bad opcode %d\n", wr->opcode); + err = EINVAL; + *bad_wr = wr; + goto out; + } + + if (unlikely(xsc_wq_overflow(&qp->sq, nreq, + to_xcq(qp->ibv_qp->send_cq)))) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "send work queue overflow\n"); + err = ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "max gs exceeded %d (max = %d)\n", + wr->num_sge, qp->sq.max_gs); + err = ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->opcode == IBV_WR_RDMA_READ && wr->num_sge > 1)) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "rdma read, max gs exceeded %d (max = 1)\n", + wr->num_sge); + err = ENOMEM; + *bad_wr = wr; + goto out; + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + zero_send_ds(idx, qp); + ctrl = seg = xsc_get_send_wqe(qp, idx); + ctrl->ds_data_num = 0; + WR_LE_16(ctrl->wqe_id, + qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT)); + ctrl->se = wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0; + ctrl->ce = qp->sq_signal_bits ? 1 : (wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0); + ctrl->in_line = wr->send_flags & IBV_SEND_INLINE ? 1 : 0; + for (i = 0; i < wr->num_sge; ++i) { + if (likely(wr->sg_list[i].length)) { + msg_len += wr->sg_list[i].length; + } + } + ctrl->msg_len = msg_len; + ctrl->with_immdt = 0; + + if (unlikely(wr->opcode == IBV_WR_RDMA_READ && msg_len == 0)) { + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "rdma read, msg len should not be 0\n"); + /* workaround, return success for posting zero-length read */ + err = 0; + goto out; + } + + switch (ibqp->qp_type) { + case IBV_QPT_RC: + switch (wr->opcode) { + case IBV_WR_SEND_WITH_INV: + case IBV_WR_SEND: + break; + case IBV_WR_SEND_WITH_IMM: + ctrl->with_immdt = 1; + ctrl->opcode_data = send_ieth(wr); + break; + case IBV_WR_RDMA_WRITE_WITH_IMM: + ctrl->with_immdt = 1; + ctrl->opcode_data = send_ieth(wr); + SWITCH_FALLTHROUGH; + case IBV_WR_RDMA_READ: + case IBV_WR_RDMA_WRITE: + if (ctrl->msg_len == 0) + break; + ctrl->ds_data_num++; + data_seg = get_seg_wqe(ctrl, seg_index); + set_remote_addr_seg( + data_seg, + msg_len, + wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg_index++; + break; + default: + printf("debug: opcode:%u NOT supported\n", wr->opcode); + err = EPERM; + *bad_wr = wr; + goto out; + } + break; + default: + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "qp type:%u NOT supported\n", ibqp->qp_type); + err = EPERM; + *bad_wr = wr; + goto out; + } + + if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { + err = set_data_inl_seg(qp, wr, ctrl); + if (unlikely(err)) { + *bad_wr = wr; + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND, + "inline layout failed, err %d\n", err); + goto out; + } + } else { + for (i = 0; i < wr->num_sge; ++i, ++seg_index) { + if (likely(wr->sg_list[i].length)) { + data_seg = get_seg_wqe(ctrl, seg_index); + set_local_data_seg(data_seg, &wr->sg_list[i]); + ctrl->ds_data_num++; + } + } + } + + ctrl->msg_opcode = xsc_ib_opcode[wr->opcode]; + if (ctrl->msg_len == 0) { + ctrl->ds_data_num = 0; + zero_send_ds(idx, qp); + } + qp->sq.wrid[idx] = wr->wr_id; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += 1; + if (ctrl->ce) { + qp->sq.flush_wqe_cnt++; + qp->sq.need_flush[idx] = 1; + } + qp->sq.wr_opcode[idx] = wr->opcode; + + if (xsc_debug_mask & XSC_DBG_QP_SEND) + dump_wqe(0, idx, qp); + } + +out: + xsc_post_send_db(qp, nreq); + xsc_spin_unlock(&qp->sq.lock); + + return err; +} + +int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + return _xsc_post_send(ibqp, wr, bad_wr); +} + +static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig, + int size, uint16_t idx) +{ + uint8_t sign; + uint32_t qpn = rwq->wq.wq_num; + + sign = calc_sig(sig, size); + sign ^= calc_sig(&qpn, 4); + sign ^= calc_sig(&idx, 2); + sig->signature = sign; +} + +int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct xsc_rwq *rwq = to_xrwq(ibwq); + struct xsc_wqe_data_seg *scat; + int err = 0; + int nreq; + int ind; + int i, j; + struct xsc_rwqe_sig *sig; + + xsc_spin_lock(&rwq->rq.lock); + + ind = rwq->rq.head & (rwq->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(xsc_wq_overflow(&rwq->rq, nreq, + to_xcq(rwq->wq.cq)))) { + err = ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > rwq->rq.max_gs)) { + err = EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_wq_recv_wqe(rwq, ind); + sig = (struct xsc_rwqe_sig *)scat; + if (unlikely(rwq->wq_sig)) { + memset(sig, 0, 1 << rwq->rq.wqe_shift); + ++scat; + } + + for (i = 0, j = 0; i < wr->num_sge; ++i) { + if (unlikely(!wr->sg_list[i].length)) + continue; + //set_data_ptr_seg(scat + j++, wr->sg_list + i); + } + + if (j < rwq->rq.max_gs) { + scat[j].seg_len = 0; + scat[j].mkey = htole32(XSC_INVALID_LKEY); + scat[j].va = 0; + } + + if (unlikely(rwq->wq_sig)) + set_wq_sig_seg(rwq, sig, (wr->num_sge + 1) << 4, + rwq->rq.head & 0xffff); + + rwq->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (rwq->rq.wqe_cnt - 1); + rwq->rq.flush_wqe_cnt++; + } + +out: + if (likely(nreq)) { + rwq->rq.head += nreq; + /* + * Make sure that descriptors are written before + * doorbell record. + */ + udma_to_device_barrier(); + *(rwq->recv_db) = htobe32(rwq->rq.head & 0xffff); + } + + xsc_spin_unlock(&rwq->rq.lock); + + return err; +} + +int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct xsc_qp *qp = to_xqp(ibqp); + struct xsc_wqe_data_seg *recv_head; + struct xsc_wqe_data_seg *data_seg; + int err = 0; + uint16_t next_pid = 0; + union xsc_db_data db; + int nreq; + uint16_t idx; + int i; + + xsc_spin_lock(&qp->rq.lock); + + idx = qp->rq.head & (qp->rq.wqe_cnt - 1); + + zero_recv_ds(idx, qp); + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(xsc_wq_overflow(&qp->rq, nreq, + to_xcq(qp->ibv_qp->recv_cq)))) { + printf("recv work queue overflow\n"); + err = ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + printf("max gs exceeded %d (max = %d)\n", + wr->num_sge, qp->rq.max_gs); + err = EINVAL; + *bad_wr = wr; + goto out; + } + + recv_head = get_recv_wqe(qp, idx); + + for (i = 0; i < wr->num_sge; ++i) { + if (unlikely(!wr->sg_list[i].length)) + continue; + data_seg = get_seg_wqe(recv_head, i); + WR_LE_32(data_seg->seg_len, wr->sg_list[i].length); + WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey); + WR_LE_64(data_seg->va, wr->sg_list[i].addr); + } + + qp->rq.wrid[idx] = wr->wr_id; + + dump_wqe(1, idx, qp); + idx = (idx + 1) & (qp->rq.wqe_cnt - 1); + qp->rq.flush_wqe_cnt++; + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT); + db.rq_next_pid = next_pid; + db.rqn = qp->rqn; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + udma_to_device_barrier(); + WR_REG(qp->rq.db, db.raw_data); + } + + xsc_spin_unlock(&qp->rq.lock); + + return err; +} + +int xsc_use_huge(const char *key) +{ + char *e; + e = getenv(key); + if (e && !strcmp(e, "y")) + return 1; + + return 0; +} + +struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn) +{ + int tind = qpn >> XSC_QP_TABLE_SHIFT; + + if (ctx->qp_table[tind].refcnt) + return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK]; + else + return NULL; +} + +int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp) +{ + int tind = qpn >> XSC_QP_TABLE_SHIFT; + + if (!ctx->qp_table[tind].refcnt) { + ctx->qp_table[tind].table = calloc(XSC_QP_TABLE_MASK + 1, + sizeof(struct xsc_qp *)); + if (!ctx->qp_table[tind].table) + return -1; + } + + ++ctx->qp_table[tind].refcnt; + ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = qp; + return 0; +} + +void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn) +{ + int tind = qpn >> XSC_QP_TABLE_SHIFT; + + if (!--ctx->qp_table[tind].refcnt) + free(ctx->qp_table[tind].table); + else + ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = NULL; +} + +int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, + enum ibv_qp_state state) +{ + struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; + struct xsc_qp *xqp = to_xqp(qp); + int ret = 0; + + xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, + "modify qp: qpid %d, cur_qp_state %d, qp_state %d\n", xqp->rsc.rsn, cur_state, state); + if (cur_state == IBV_QPS_ERR && state != IBV_QPS_ERR) { + if(qp->recv_cq) { + list_for_each_safe(&to_xcq(qp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { + if (err_rq_node->qp_id == xqp->rsc.rsn) { + list_del(&err_rq_node->entry); + free(err_rq_node); + } + } + } + + if(qp->send_cq) { + list_for_each_safe(&to_xcq(qp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { + if (err_sq_node->qp_id == xqp->rsc.rsn) { + list_del(&err_sq_node->entry); + free(err_sq_node); + } + } + } + return ret; + } + + if (cur_state != IBV_QPS_ERR && state == IBV_QPS_ERR) { + if(qp->recv_cq) { + err_rq_node = calloc(1, sizeof(*err_rq_node)); + if (!err_rq_node) + return ENOMEM; + err_rq_node->qp_id = xqp->rsc.rsn; + err_rq_node->is_sq = false; + list_add_tail(&to_xcq(qp->recv_cq)->err_state_qp_list, &err_rq_node->entry); + } + + if(qp->send_cq) { + err_sq_node = calloc(1, sizeof(*err_sq_node)); + if (!err_sq_node) + return ENOMEM; + err_sq_node->qp_id = xqp->rsc.rsn; + err_sq_node->is_sq = true; + list_add_tail(&to_xcq(qp->send_cq)->err_state_qp_list, &err_sq_node->entry); + } + } + return ret; +} diff --git a/providers/xscale/rqm_csr_defines.h b/providers/xscale/rqm_csr_defines.h new file mode 100644 index 0000000..9552855 --- /dev/null +++ b/providers/xscale/rqm_csr_defines.h @@ -0,0 +1,200 @@ +#ifndef _RQM_CSR_DEFINES_H_ +#define _RQM_CSR_DEFINES_H_ + +#define RQM_SOFT_RESET_REG_ADDR 0x5000 +#define RQM_SOFT_RESET_MASK 0x1 +#define RQM_SOFT_RESET_SHIFT 0 + +#define RQM_COUNTER_CONFIG_REG_ADDR 0x5020 +#define RQM_CFG_CNT_WRAP_MASK 0x1 +#define RQM_CFG_CNT_WRAP_SHIFT 0 +#define RQM_CFG_CNT_RC_MASK 0x2 +#define RQM_CFG_CNT_RC_SHIFT 1 + +#define RQM_SCRATCH_PAD_REG_ADDR 0x5040 +#define RQM_SCRATCH_PAD_MASK 0xffffffffffffffff +#define RQM_SCRATCH_PAD_SHIFT 0 + +#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x5060 +#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 +#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_RING_ADDR_MASK 0xffffffffffffffff +#define RQM_CFG_CPU2RQM_RING_ADDR_SHIFT 0 + +#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x5160 +#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 +#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_RING_SIZE_MASK 0xffff +#define RQM_CFG_CPU2RQM_RING_SIZE_SHIFT 0 + +#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_ADDR 0x5260 +#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_SIZE 8 +#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_NEXT_PID_MASK 0xffff +#define RQM_CFG_CPU2RQM_NEXT_PID_SHIFT 0 + +#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x5360 +#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 +#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_CFG_EN_MASK 0x1 +#define RQM_CFG_CPU2RQM_CFG_EN_SHIFT 0 + +#define RQM_RQM_STATUS_REG_ARRAY_ADDR 0x5460 +#define RQM_RQM_STATUS_REG_ARRAY_SIZE 8 +#define RQM_RQM_STATUS_REG_ARRAY_STRIDE 0x20 +#define RQM_STS_RQM2CPU_DONE_CID_MASK 0xffff +#define RQM_STS_RQM2CPU_DONE_CID_SHIFT 0 + +#define RQM_RQM_CONFIG_MER_QPID_FIFO_TH_ADDR 0x5560 +#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_MASK 0x1f +#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_SHIFT 0 +#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_MASK 0x3e0 +#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_SHIFT 5 + +#define RQM_RQM_CONFIG_DMA_QPID_FIFO_TH_ADDR 0x5580 +#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_MASK 0x1f +#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_SHIFT 0 +#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_MASK 0x3e0 +#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_SHIFT 5 + +#define RQM_RQM_CONFIG_PTR_QPID_FIFO_TH_ADDR 0x55a0 +#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_MASK 0x1f +#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_SHIFT 0 +#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_MASK 0x3e0 +#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_SHIFT 5 + +#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_ADDR 0x55c0 +#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_SIZE 8 +#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_MASK 0x1f +#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_SHIFT 0 + +#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_ADDR 0x56c0 +#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_SIZE 8 +#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_STRIDE 0x20 +#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_MASK 0x1f +#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_SHIFT 0 + +#define RQM_RQM_INT_STATE_REG_ADDR 0x57c0 +#define RQM_RQM_WQE_FIFO_OVFL_ERR_MASK 0xff +#define RQM_RQM_WQE_FIFO_OVFL_ERR_SHIFT 0 +#define RQM_RQM_WQE_FIFO_UNFL_ERR_MASK 0xff00 +#define RQM_RQM_WQE_FIFO_UNFL_ERR_SHIFT 8 +#define RQM_RQM_NO_WQE_ERR_MASK 0xff0000 +#define RQM_RQM_NO_WQE_ERR_SHIFT 16 + +#define RQM_RQM_FIFO_USED_CNT_REG_ADDR 0x57e0 +#define RQM_RQM_FIFO_USED_CNT_REG_SIZE 8 +#define RQM_RQM_FIFO_USED_CNT_REG_STRIDE 0x20 +#define RQM_RQM_WQE_FIFO_USED_CNT_MASK 0xf +#define RQM_RQM_WQE_FIFO_USED_CNT_SHIFT 0 + +#define RQM_RQM_CMD_FIFO_STATE_REG_ADDR 0x58e0 +#define RQM_RQM_WQE_FIFO_MTY_MASK 0xff +#define RQM_RQM_WQE_FIFO_MTY_SHIFT 0 +#define RQM_RQM_WQE_FIFO_FUL_MASK 0xff00 +#define RQM_RQM_WQE_FIFO_FUL_SHIFT 8 + +#define RQM_RQM_OTH_FIFO_STATE_REG_ADDR 0x5900 +#define RQM_RQM_OTH_FIFO_MTY_MASK 0x7 +#define RQM_RQM_OTH_FIFO_MTY_SHIFT 0 +#define RQM_RQM_OTH_FIFO_AFUL_MASK 0x38 +#define RQM_RQM_OTH_FIFO_AFUL_SHIFT 3 +#define RQM_RQM_OTH_FIFO_OVFL_ERR_MASK 0x1c0 +#define RQM_RQM_OTH_FIFO_OVFL_ERR_SHIFT 6 +#define RQM_RQM_OTH_FIFO_UNFL_ERR_MASK 0xe00 +#define RQM_RQM_OTH_FIFO_UNFL_ERR_SHIFT 9 + +#define RQM_RQM_OTHERS_FIFO_USED_CNT_REG_ADDR 0x5920 +#define RQM_RQM_MER_REQ_FIFO_USED_CNT_MASK 0xf +#define RQM_RQM_MER_REQ_FIFO_USED_CNT_SHIFT 0 +#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_MASK 0xf0 +#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_SHIFT 4 +#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_MASK 0xf00 +#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_SHIFT 8 + +#define RQM_RQM_DEBUG_INFO_STATE_REG_0_ADDR 0x5940 +#define RQM_RQM2MER_DBG_OPCODE_MASK 0xff +#define RQM_RQM2MER_DBG_OPCODE_SHIFT 0 +#define RQM_RQM2MER_DBG_TX0_RX1_MASK 0x100 +#define RQM_RQM2MER_DBG_TX0_RX1_SHIFT 8 +#define RQM_RQM2MER_DBG_CAP_MASK 0x200 +#define RQM_RQM2MER_DBG_CAP_SHIFT 9 +#define RQM_RQM2MER_DBG_L_QPID_MASK 0x1c00 +#define RQM_RQM2MER_DBG_L_QPID_SHIFT 10 +#define RQM_RQM2MER_DBG_SN_MASK 0x1fffffe000 +#define RQM_RQM2MER_DBG_SN_SHIFT 13 + +#define RQM_RQM_DEBUG_INFO_STATE_REG_1_ADDR 0x5960 +#define RQM_RQM2MER_DBG_MOD_IF_BM_MASK 0xffffffffffffffff +#define RQM_RQM2MER_DBG_MOD_IF_BM_SHIFT 0 + +#define RQM_RQM_DEBUG_INFO_STATE_REG_2_ADDR 0x5980 +#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_MASK 0xffffffff +#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_SHIFT 0 +#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_MASK 0xffffffff00000000 +#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_SHIFT 32 + +#define RQM_RQM_DMA_REQ_LEN_STATE_REG_ADDR 0x59a0 +#define RQM_RQM_DMA_REQ_LEN_MASK 0x3ff +#define RQM_RQM_DMA_REQ_LEN_SHIFT 0 + +#define RQM_RQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x59c0 +#define RQM_RQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_REQ_ADDR_SHIFT 0 + +#define RQM_RQM_WQE_WQEID_ADDR 0x59e0 +#define RQM_RQM_WQE_WQEID_MASK 0xffff +#define RQM_RQM_WQE_WQEID_SHIFT 0 + +#define RQM_RQM_WQE_RECV_LEN_ADDR 0x5a00 +#define RQM_RQM_WQE_REC_LEN_MASK 0x7fffffff +#define RQM_RQM_WQE_REC_LEN_SHIFT 0 + +#define RQM_RQM_WQE_LOCAL_VA_ADDR 0x5a20 +#define RQM_RQM_WQE_L_VA_MASK 0xffffffffffffffff +#define RQM_RQM_WQE_L_VA_SHIFT 0 + +#define RQM_RQM_WQE_LOCAL_KEY_ADDR 0x5a40 +#define RQM_RQM_WQE_L_KEY_MASK 0xffffffff +#define RQM_RQM_WQE_L_KEY_SHIFT 0 + +#define RQM_MER_RQM_WQE_QPID_ADDR 0x5a60 +#define RQM_RQM_WQE_QPID_MASK 0x7 +#define RQM_RQM_WQE_QPID_SHIFT 0 + +#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_ADDR 0x5a80 +#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_SIZE 8 +#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_STRIDE 0x20 +#define RQM_RQM_NEXT_CID_MASK 0xffff +#define RQM_RQM_NEXT_CID_SHIFT 0 + +#define RQM_RQM_DMA_IN_SOP_CNT_REG_ADDR 0x5b80 +#define RQM_RQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_IN_SOP_CNT_SHIFT 0 + +#define RQM_RQM_DMA_IN_EOP_CNT_REG_ADDR 0x5ba0 +#define RQM_RQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_IN_EOP_CNT_SHIFT 0 + +#define RQM_RQM_DMA_IN_VLD_CNT_REG_ADDR 0x5bc0 +#define RQM_RQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_IN_VLD_CNT_SHIFT 0 + +#define RQM_RQM_DMA_REQ_CNT_REG_ADDR 0x5be0 +#define RQM_RQM_DMA_REQ_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_REQ_CNT_SHIFT 0 + +#define RQM_RQM_DMA_GNT_CNT_REG_ADDR 0x5c00 +#define RQM_RQM_DMA_GNT_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_DMA_GNT_CNT_SHIFT 0 + +#define RQM_RQM_MER_VLD_CNT_REG_ADDR 0x5c20 +#define RQM_RQM_MER_VLD_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_MER_VLD_CNT_SHIFT 0 + +#define RQM_RQM_MER_REQ_CNT_REG_ADDR 0x5c40 +#define RQM_RQM_MER_REQ_CNT_MASK 0xffffffffffffffff +#define RQM_RQM_MER_REQ_CNT_SHIFT 0 + +#endif diff --git a/providers/xscale/sqm_csr_defines.h b/providers/xscale/sqm_csr_defines.h new file mode 100644 index 0000000..e0dc6e9 --- /dev/null +++ b/providers/xscale/sqm_csr_defines.h @@ -0,0 +1,204 @@ +#ifndef _SQM_CSR_DEFINES_H_ +#define _SQM_CSR_DEFINES_H_ + +#define SQM_SOFT_RESET_REG_ADDR 0x4000 +#define SQM_SOFT_RESET_MASK 0x1 +#define SQM_SOFT_RESET_SHIFT 0 + +#define SQM_COUNTER_CONFIG_REG_ADDR 0x4020 +#define SQM_CFG_CNT_WRAP_MASK 0x1 +#define SQM_CFG_CNT_WRAP_SHIFT 0 +#define SQM_CFG_CNT_RC_MASK 0x2 +#define SQM_CFG_CNT_RC_SHIFT 1 + +#define SQM_SCRATCH_PAD_REG_ADDR 0x4040 +#define SQM_SCRATCH_PAD_MASK 0xffffffffffffffff +#define SQM_SCRATCH_PAD_SHIFT 0 + +#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x4060 +#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8 +#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20 +#define SQM_CFG_CPU2SQM_RING_ADDR_MASK 0xffffffffffffffff +#define SQM_CFG_CPU2SQM_RING_ADDR_SHIFT 0 + +#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x4160 +#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8 +#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20 +#define SQM_CFG_CPU2SQM_RING_SIZE_MASK 0xffff +#define SQM_CFG_CPU2SQM_RING_SIZE_SHIFT 0 + +#define SQM_SQM_CONFIG_REG_ARRAY_ADDR 0x4260 +#define SQM_SQM_CONFIG_REG_ARRAY_SIZE 8 +#define SQM_SQM_CONFIG_REG_ARRAY_STRIDE 0x20 +#define SQM_CFG_CPU2SQM_NEXT_PID_MASK 0xffff +#define SQM_CFG_CPU2SQM_NEXT_PID_SHIFT 0 + +#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x4360 +#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8 +#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20 +#define SQM_CFG_CPU2SQM_CFG_EN_MASK 0x1 +#define SQM_CFG_CPU2SQM_CFG_EN_SHIFT 0 + +#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_ADDR 0x4460 +#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_SIZE 8 +#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_STRIDE 0x20 +#define SQM_STS_SQM2CPU_DONE_CID_MASK 0xffff +#define SQM_STS_SQM2CPU_DONE_CID_SHIFT 0 + +#define SQM_SQM_CFG_WQE_FIFO_TH_ADDR 0x4560 +#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_MASK 0xff +#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_SHIFT 0 +#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_MASK 0xff00 +#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_SHIFT 8 + +#define SQM_SQM_CONFIG_DBG_FIFO_REG_CFG_ADDR 0x4580 +#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_MASK 0xff +#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_SHIFT 0 +#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_MASK 0xff00 +#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_SHIFT 8 + +#define SQM_SQM_CONFIG_QPID_W_FIFO_REG_CFG_ADDR 0x45a0 +#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_MASK 0xff +#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_SHIFT 0 +#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_MASK 0xff00 +#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_SHIFT 8 + +#define SQM_SQM_CONFIG_QPID_R_FIFO_REG_CFG_ADDR 0x45c0 +#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_MASK 0xff +#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_SHIFT 0 +#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_MASK 0xff00 +#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_SHIFT 8 + +#define SQM_SQM_INT_STATE_REG_ADDR 0x45e0 +#define SQM_SQM_FIFO_OVFL_ERR_MASK 0xf +#define SQM_SQM_FIFO_OVFL_ERR_SHIFT 0 +#define SQM_SQM_FIFO_UNFL_ERR_MASK 0xf0 +#define SQM_SQM_FIFO_UNFL_ERR_SHIFT 4 +#define SQM_SQM_FIFO_MTY_MASK 0xf00 +#define SQM_SQM_FIFO_MTY_SHIFT 8 +#define SQM_SQM_FIFO_AFUL_MASK 0xf000 +#define SQM_SQM_FIFO_AFUL_SHIFT 12 +#define SQM_SQM_SOP_EOP_NO_EQUAL_MASK 0x10000 +#define SQM_SQM_SOP_EOP_NO_EQUAL_SHIFT 16 + +#define SQM_SQM_FIFO_USED_CNT_REG_ADDR 0x4600 +#define SQM_SQM_WQE_FIFO_USED_CNT_MASK 0x7f +#define SQM_SQM_WQE_FIFO_USED_CNT_SHIFT 0 +#define SQM_SQM_HEAD_FIFO_USED_CNT_MASK 0x3f80 +#define SQM_SQM_HEAD_FIFO_USED_CNT_SHIFT 7 +#define SQM_SQM_PTR_FIFO_USED_CNT_MASK 0x1fc000 +#define SQM_SQM_PTR_FIFO_USED_CNT_SHIFT 14 +#define SQM_SQM_DBG_FIFO_USED_CNT_MASK 0xfe00000 +#define SQM_SQM_DBG_FIFO_USED_CNT_SHIFT 21 + +#define SQM_SQM_DMA_REQUEST_LEN_REG_ADDR 0x4620 +#define SQM_SQM_DMA_REQ_LEN_MASK 0x3ff +#define SQM_SQM_DMA_REQ_LEN_SHIFT 0 + +#define SQM_SQM_DMA_REQUEST_ADDR_REG_ADDR 0x4640 +#define SQM_SQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_REQ_ADDR_SHIFT 0 + +#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_ADDR 0x4660 +#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_SIZE 8 +#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_STRIDE 0x20 +#define SQM_SQM_NEXT_CID_MASK 0xffff +#define SQM_SQM_NEXT_CID_SHIFT 0 + +#define SQM_SQM_WQE_OPCODE_ADDR 0x4760 +#define SQM_SQM_WQE_OPCODE_MASK 0xff +#define SQM_SQM_WQE_OPCODE_SHIFT 0 + +#define SQM_SQM_WQE_WQEID_ADDR 0x4780 +#define SQM_SQM_WQE_WQEID_MASK 0xffff +#define SQM_SQM_WQE_WQEID_SHIFT 0 + +#define SQM_SQM_WQE_R_VA_ADDR 0x47a0 +#define SQM_SQM_WQE_R_VA_MASK 0xffffffffffffffff +#define SQM_SQM_WQE_R_VA_SHIFT 0 + +#define SQM_SQM_WQE_R_KEY_ADDR 0x47c0 +#define SQM_SQM_WQE_R_KEY_MASK 0xffffffff +#define SQM_SQM_WQE_R_KEY_SHIFT 0 + +#define SQM_SQM_WQE_L_LEN_ADDR 0x47e0 +#define SQM_SQM_WQE_L_LEN_MASK 0x7fffffff +#define SQM_SQM_WQE_L_LEN_SHIFT 0 + +#define SQM_SQM_WQE_L_VA_ADDR 0x4800 +#define SQM_SQM_WQE_L_VA_MASK 0xffffffffffffffff +#define SQM_SQM_WQE_L_VA_SHIFT 0 + +#define SQM_SQM_WQE_L_KEY_ADDR 0x4820 +#define SQM_SQM_WQE_L_KEY_MASK 0xffffffff +#define SQM_SQM_WQE_L_KEY_SHIFT 0 + +#define SQM_SQM_WQE_QPID_ADDR 0x4840 +#define SQM_SQM_WQE_QPID_MASK 0x7 +#define SQM_SQM_WQE_QPID_SHIFT 0 + +#define SQM_SQM_DMA_IN_SOP_CNT_REG_ADDR 0x4860 +#define SQM_SQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_IN_SOP_CNT_SHIFT 0 + +#define SQM_SQM_DMA_IN_EOP_CNT_REG_ADDR 0x4880 +#define SQM_SQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_IN_EOP_CNT_SHIFT 0 + +#define SQM_SQM_DMA_IN_VLD_CNT_REG_ADDR 0x48a0 +#define SQM_SQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_IN_VLD_CNT_SHIFT 0 + +#define SQM_SQM_DMA_REQ_CNT_REG_ADDR 0x48c0 +#define SQM_SQM_DMA_REQ_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_REQ_CNT_SHIFT 0 + +#define SQM_SQM_DMA_GNT_CNT_REG_ADDR 0x48e0 +#define SQM_SQM_DMA_GNT_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_DMA_GNT_CNT_SHIFT 0 + +#define SQM_SQM_MET_VLD_CNT_REG_ADDR 0x4900 +#define SQM_SQM_MET_CNT_MASK 0xffffffffffffffff +#define SQM_SQM_MET_CNT_SHIFT 0 + +#define SQM_SQM_CONFIG_CAP_CFG_EN_ADDR 0x4920 +#define SQM_CFG_CPU2SQM_CAP_EN_CLR_MASK 0x1 +#define SQM_CFG_CPU2SQM_CAP_EN_CLR_SHIFT 0 +#define SQM_CFG_CPU2SQM_CAP_QPID_EN_MASK 0x2 +#define SQM_CFG_CPU2SQM_CAP_QPID_EN_SHIFT 1 +#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_MASK 0x4 +#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_SHIFT 2 +#define SQM_CFG_CPU2SQM_CAP_QPID_MASK 0x38 +#define SQM_CFG_CPU2SQM_CAP_QPID_SHIFT 3 +#define SQM_CFG_CPU2SQM_CAP_OPCODE_MASK 0x3fc0 +#define SQM_CFG_CPU2SQM_CAP_OPCODE_SHIFT 6 + +#define SQM_SQM_DEBUG_INFO_STATE_REG_0_ADDR 0x4940 +#define SQM_SQM2MET_DBG_OPCODE_MASK 0xff +#define SQM_SQM2MET_DBG_OPCODE_SHIFT 0 +#define SQM_SQM2MET_DBG_TX0_RX1_MASK 0x100 +#define SQM_SQM2MET_DBG_TX0_RX1_SHIFT 8 +#define SQM_SQM2MET_DBG_CAP_MASK 0x200 +#define SQM_SQM2MET_DBG_CAP_SHIFT 9 +#define SQM_SQM2MET_DBG_L_QPID_MASK 0x1c00 +#define SQM_SQM2MET_DBG_L_QPID_SHIFT 10 +#define SQM_SQM2MET_DBG_SN_MASK 0x1fffffe000 +#define SQM_SQM2MET_DBG_SN_SHIFT 13 + +#define SQM_SQM_DEBUG_INFO_STATE_REG_1_ADDR 0x4960 +#define SQM_SQM2MET_DBG_MOD_IF_BM_MASK 0xffffffffffffffff +#define SQM_SQM2MET_DBG_MOD_IF_BM_SHIFT 0 + +#define SQM_SQM_DMA_REQ_COUNTER_REG_ADDR 0x4980 +#define SQM_SQM_DMA_REQ_COUNTER_MASK 0xff +#define SQM_SQM_DMA_REQ_COUNTER_SHIFT 0 + +#define SQM_SQM_DMA_GNT_COUNTER_REG_ADDR 0x49a0 +#define SQM_SQM_DMA_GNT_COUNTER_MASK 0xff +#define SQM_SQM_DMA_GNT_COUNTER_SHIFT 0 + +#define SQM_SQM_SQM2MET_COUNTER_REG_ADDR 0x49c0 +#define SQM_SQM_SQM2MET_CNT_MASK 0xff +#define SQM_SQM_SQM2MET_CNT_SHIFT 0 + +#endif diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c new file mode 100644 index 0000000..937bed1 --- /dev/null +++ b/providers/xscale/verbs.c @@ -0,0 +1,2816 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "xscale.h" +#include "xsc-abi.h" +#include "wqe.h" +#include "xsc_hsi.h" + +int xsc_single_threaded = 0; + +static inline int is_xrc_tgt(int type) +{ + return type == IBV_QPT_XRC_RECV; +} + +static void xsc_set_fw_version(struct ibv_device_attr *attr, union xsc_ib_fw_ver *fw_ver) +{ + uint8_t ver_major = fw_ver->s.ver_major; + uint8_t ver_minor = fw_ver->s.ver_minor; + uint16_t ver_patch = fw_ver->s.ver_patch; + uint32_t ver_tweak = fw_ver->s.ver_tweak; + + if (ver_tweak == 0) { + snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u", + ver_major, ver_minor, ver_patch); + } else { + snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u+%u", + ver_major, ver_minor, ver_patch, ver_tweak); + } +} + +static int xsc_read_clock(struct ibv_context *context, uint64_t *cycles) +{ + unsigned int clockhi, clocklo, clockhi1; + int i; + struct xsc_context *ctx = to_xctx(context); + + if (!ctx->hca_core_clock) + return EOPNOTSUPP; + + /* Handle wraparound */ + for (i = 0; i < 2; i++) { + clockhi = be32toh(mmio_read32_be(ctx->hca_core_clock)); + clocklo = be32toh(mmio_read32_be(ctx->hca_core_clock + 4)); + clockhi1 = be32toh(mmio_read32_be(ctx->hca_core_clock)); + if (clockhi == clockhi1) + break; + } + + *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; + + return 0; +} + +int xsc_query_rt_values(struct ibv_context *context, + struct ibv_values_ex *values) +{ + uint32_t comp_mask = 0; + int err = 0; + + if (!check_comp_mask(values->comp_mask, IBV_VALUES_MASK_RAW_CLOCK)) + return EINVAL; + + if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { + uint64_t cycles; + + err = xsc_read_clock(context, &cycles); + if (!err) { + values->raw_clock.tv_sec = 0; + values->raw_clock.tv_nsec = cycles; + comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; + } + } + + values->comp_mask = comp_mask; + + return err; +} + +int xsc_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr) +{ + struct ibv_query_port cmd; + + return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); +} + +struct ibv_pd *xsc_alloc_pd(struct ibv_context *context) +{ + struct ibv_alloc_pd cmd; + struct xsc_alloc_pd_resp resp; + struct xsc_pd *pd; + + pd = calloc(1, sizeof *pd); + if (!pd) + return NULL; + + if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp)) { + free(pd); + return NULL; + } + + atomic_init(&pd->refcount, 1); + pd->pdn = resp.pdn; + xsc_dbg(to_xctx(context)->dbg_fp, XSC_DBG_PD, "pd number:%u\n", pd->pdn); + + return &pd->ibv_pd; +} + +struct ibv_pd * +xsc_alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) +{ + struct xsc_parent_domain *xparent_domain; + + if (ibv_check_alloc_parent_domain(attr)) + return NULL; + + if (attr->comp_mask) { + errno = EINVAL; + return NULL; + } + + xparent_domain = calloc(1, sizeof(*xparent_domain)); + if (!xparent_domain) { + errno = ENOMEM; + return NULL; + } + + xparent_domain->xpd.xprotection_domain = to_xpd(attr->pd); + atomic_fetch_add(&xparent_domain->xpd.xprotection_domain->refcount, 1); + atomic_init(&xparent_domain->xpd.refcount, 1); + + ibv_initialize_parent_domain( + &xparent_domain->xpd.ibv_pd, + &xparent_domain->xpd.xprotection_domain->ibv_pd); + + return &xparent_domain->xpd.ibv_pd; +} + +static int xsc_dealloc_parent_domain(struct xsc_parent_domain *xparent_domain) +{ + if (atomic_load(&xparent_domain->xpd.refcount) > 1) + return EBUSY; + + atomic_fetch_sub(&xparent_domain->xpd.xprotection_domain->refcount, 1); + + free(xparent_domain); + return 0; +} + +int xsc_free_pd(struct ibv_pd *pd) +{ + int ret; + struct xsc_parent_domain *xparent_domain = to_xparent_domain(pd); + struct xsc_pd *xpd = to_xpd(pd); + + if (xparent_domain) + return xsc_dealloc_parent_domain(xparent_domain); + + if (atomic_load(&xpd->refcount) > 1) + return EBUSY; + + ret = ibv_cmd_dealloc_pd(pd); + if (ret) + return ret; + + xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_PD, "dealloc pd\n"); + free(xpd); + + return 0; +} + +struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, int acc) +{ + struct xsc_mr *mr; + struct ibv_reg_mr cmd; + int ret; + enum ibv_access_flags access = (enum ibv_access_flags)acc; + struct ib_uverbs_reg_mr_resp resp; + + mr = calloc(1, sizeof(*mr)); + if (!mr) + return NULL; + + ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, + &mr->vmr, &cmd, sizeof(cmd), &resp, + sizeof resp); + if (ret) { + xsc_free_buf(&(mr->buf)); + free(mr); + return NULL; + } + mr->alloc_flags = acc; + + xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_MR, "lkey:%u, rkey:%u\n", + mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey); + + return &mr->vmr.ibv_mr; +} + +struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd) +{ + struct xsc_mr *mr; + struct xsc_context *ctx = to_xctx(pd->context); + + if (ctx->dump_fill_mkey == XSC_INVALID_LKEY) { + errno = ENOTSUP; + return NULL; + } + + mr = calloc(1, sizeof(*mr)); + if (!mr) { + errno = ENOMEM; + return NULL; + } + + mr->vmr.ibv_mr.lkey = ctx->dump_fill_mkey; + + mr->vmr.ibv_mr.context = pd->context; + mr->vmr.ibv_mr.pd = pd; + mr->vmr.ibv_mr.addr = NULL; + mr->vmr.ibv_mr.length = SIZE_MAX; + mr->vmr.mr_type = IBV_MR_TYPE_NULL_MR; + + return &mr->vmr.ibv_mr; +} + +enum { + XSC_DM_ALLOWED_ACCESS = IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC | + IBV_ACCESS_ZERO_BASED +}; + +struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, + uint64_t dm_offset, size_t length, + unsigned int acc) +{ + struct xsc_dm *dm = to_xdm(ibdm); + struct xsc_mr *mr; + int ret; + + if (acc & ~XSC_DM_ALLOWED_ACCESS) { + errno = EINVAL; + return NULL; + } + + mr = calloc(1, sizeof(*mr)); + if (!mr) { + errno = ENOMEM; + return NULL; + } + + ret = ibv_cmd_reg_dm_mr(pd, &dm->verbs_dm, dm_offset, length, acc, + &mr->vmr, NULL); + if (ret) { + free(mr); + return NULL; + } + + mr->alloc_flags = acc; + + return &mr->vmr.ibv_mr; +} + +int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, + void *addr, size_t length, int access) +{ + struct ibv_rereg_mr cmd; + struct ib_uverbs_rereg_mr_resp resp; + + return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr, + access, pd, &cmd, sizeof(cmd), &resp, + sizeof(resp)); +} + +int xsc_dereg_mr(struct verbs_mr *vmr) +{ + int ret; + + if (vmr->mr_type == IBV_MR_TYPE_NULL_MR) + goto free; + + ret = ibv_cmd_dereg_mr(vmr); + if (ret) + return ret; + +free: + free(vmr); + return 0; +} + +int xsc_round_up_power_of_two(long long sz) +{ + long long ret; + + for (ret = 1; ret < sz; ret <<= 1) + ; /* nothing */ + + if (ret > INT_MAX) { + fprintf(stderr, "%s: roundup overflow\n", __func__); + return -ENOMEM; + } + + return (int)ret; +} + +static int align_queue_size(long long req) +{ + return xsc_round_up_power_of_two(req); +} + +enum { + CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | + IBV_WC_EX_WITH_CVLAN | + IBV_WC_EX_WITH_FLOW_TAG | + IBV_WC_EX_WITH_TM_INFO | + IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK +}; + +enum { + CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS +}; + +enum { + CREATE_CQ_SUPPORTED_FLAGS = + IBV_CREATE_CQ_ATTR_SINGLE_THREADED | + IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN +}; + +enum { + XSC_DV_CREATE_CQ_SUP_COMP_MASK = + (XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE | + XSCDV_CQ_INIT_ATTR_MASK_FLAGS | + XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE), +}; + +static int xsc_cqe_depth_check(void) +{ + char *e; + + e = getenv("XSC_CQE_DEPTH_CHECK"); + if (e && !strcmp(e, "n")) + return 0; + + return 1; +} + +static struct ibv_cq_ex *create_cq(struct ibv_context *context, + const struct ibv_cq_init_attr_ex *cq_attr, + int cq_alloc_flags, + struct xscdv_cq_init_attr *xcq_attr) +{ + struct xsc_create_cq cmd = {}; + struct xsc_create_cq_resp resp = {}; + struct xsc_create_cq_ex cmd_ex = {}; + struct xsc_create_cq_ex_resp resp_ex = {}; + struct xsc_ib_create_cq *cmd_drv; + struct xsc_ib_create_cq_resp *resp_drv; + struct xsc_cq *cq; + int cqe_sz; + int ret; + int ncqe; + struct xsc_context *xctx = to_xctx(context); + bool use_ex = false; + char *env; + int i; + + if (!cq_attr->cqe) { + xsc_err("CQE invalid\n"); + errno = EINVAL; + return NULL; + } + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u\n", cq_attr->cqe); + + if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { + xsc_err("Unsupported comp_mask for create cq\n"); + errno = EINVAL; + return NULL; + } + + if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && + cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { + xsc_err("Unsupported creation flags requested for create cq\n"); + errno = EINVAL; + return NULL; + } + + if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { + xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags); + errno = ENOTSUP; + return NULL; + } + + cq = calloc(1, sizeof *cq); + if (!cq) { + xsc_err("Alloc CQ failed\n"); + errno = ENOMEM; + return NULL; + } + + if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) { + if (cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) + cq->flags |= XSC_CQ_FLAGS_SINGLE_THREADED; + if (cq_attr->flags & IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN) + use_ex = true; + } + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "use_ex:%u\n", use_ex); + + cmd_drv = use_ex ? &cmd_ex.drv_payload : &cmd.drv_payload; + resp_drv = use_ex ? &resp_ex.drv_payload : &resp.drv_payload; + + cq->cons_index = 0; + + if (xsc_spinlock_init(&cq->lock, !xsc_single_threaded)) + goto err; + + ncqe = align_queue_size(cq_attr->cqe); + if (ncqe < XSC_CQE_RING_DEPTH_MIN) { + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u is not enough, set it as %u\n", + ncqe, XSC_CQE_RING_DEPTH_MIN); + ncqe = XSC_CQE_RING_DEPTH_MIN; + } + + if (ncqe > XSC_CQE_RING_DEPTH_MAX) { + if (xsc_cqe_depth_check()) { + xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n", + ncqe, XSC_CQE_RING_DEPTH_MAX); + errno = EINVAL; + goto err_spl; + } else { + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n", + ncqe, XSC_CQE_RING_DEPTH_MAX); + ncqe = XSC_CQE_RING_DEPTH_MAX; + } + } + + cqe_sz = XSC_CQE_SIZE; + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u, size:%u\n", ncqe, cqe_sz); + + if (xsc_alloc_cq_buf(to_xctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) { + xsc_err("Alloc cq buffer failed.\n"); + errno = ENOMEM; + goto err_spl; + } + + cq->arm_sn = 0; + cq->cqe_sz = cqe_sz; + cq->flags = cq_alloc_flags; + + cmd_drv->buf_addr = (uintptr_t) cq->buf_a.buf; + cmd_drv->db_addr = (uintptr_t) cq->dbrec; + cmd_drv->cqe_size = cqe_sz; + + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf); + + if (use_ex) { + struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr; + + cq_attr_ex.cqe = ncqe; + ret = ibv_cmd_create_cq_ex(context, &cq_attr_ex, &cq->verbs_cq, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex), + 0); + } else { + ret = ibv_cmd_create_cq(context, ncqe, cq_attr->channel, + cq_attr->comp_vector, + ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex), + &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + } + + if (ret) { + xsc_err("ibv_cmd_create_cq failed,ret %d\n", ret); + goto err_buf; + } + + cq->active_buf = &cq->buf_a; + cq->resize_buf = NULL; + cq->cqn = resp_drv->cqn; + cq->stall_enable = to_xctx(context)->stall_enable; + cq->stall_adaptive_enable = to_xctx(context)->stall_adaptive_enable; + cq->stall_cycles = to_xctx(context)->stall_cycles; + + cq->db = xctx->cqm_reg_va + + (xctx->cqm_next_cid_reg & (xctx->page_size - 1)); + cq->armdb =xctx->cqm_armdb_va + + (xctx->cqm_armdb & (xctx->page_size - 1)); + cq->cqe_cnt = ncqe; + cq->log2_cq_ring_sz = xsc_ilog2(ncqe); + + for (i = 0; i < ncqe; i++) { + struct xsc_cqe *cqe = (struct xsc_cqe *)(cq->active_buf->buf + i * cq->cqe_sz); + cqe->owner = 1; + } + + env = getenv("XSC_DISABLE_FLUSH_ERROR"); + cq->disable_flush_error_cqe = env ? true : false; + xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "cqe count:%u cqn:%u\n", cq->cqe_cnt, cq->cqn); + list_head_init(&cq->err_state_qp_list); + return &cq->verbs_cq.cq_ex; + + +err_buf: + xsc_free_cq_buf(to_xctx(context), &cq->buf_a); + +err_spl: + xsc_spinlock_destroy(&cq->lock); + +err: + free(cq); + + return NULL; +} + +struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector) +{ + struct ibv_cq_ex *cq; + struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, + .comp_vector = comp_vector, + .wc_flags = IBV_WC_STANDARD_FLAGS}; + + if (cqe <= 0) { + errno = EINVAL; + return NULL; + } + + cq = create_cq(context, &cq_attr, 0, NULL); + return cq ? ibv_cq_ex_to_cq(cq) : NULL; +} + +struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr) +{ + return create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, NULL); +} + +struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr, + struct xscdv_cq_init_attr *xcq_attr) +{ + struct ibv_cq_ex *cq; + + cq = create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, xcq_attr); + if (!cq) + return NULL; + + verbs_init_cq(ibv_cq_ex_to_cq(cq), context, + cq_attr->channel, cq_attr->cq_context); + return cq; +} + +int xsc_resize_cq(struct ibv_cq *ibcq, int cqe) +{ + struct xsc_cq *cq = to_xcq(ibcq); + + if (cqe < 0) { + errno = EINVAL; + return errno; + } + + xsc_spin_lock(&cq->lock); + cq->active_cqes = cq->verbs_cq.cq_ex.cqe; + /* currently we don't change cqe size */ + cq->resize_cqe_sz = cq->cqe_sz; + cq->resize_cqes = cq->verbs_cq.cq_ex.cqe; + xsc_spin_unlock(&cq->lock); + cq->resize_buf = NULL; + return 0; +} + +int xsc_destroy_cq(struct ibv_cq *cq) +{ + int ret; + struct xsc_err_state_qp_node *tmp, *err_qp_node; + + xsc_dbg(to_xctx(cq->context)->dbg_fp, XSC_DBG_CQ, "\n"); + ret = ibv_cmd_destroy_cq(cq); + if (ret) + return ret; + + list_for_each_safe(&to_xcq(cq)->err_state_qp_list, err_qp_node, tmp, entry) { + list_del(&err_qp_node->entry); + free(err_qp_node); + } + + xsc_free_cq_buf(to_xctx(cq->context), to_xcq(cq)->active_buf); + free(to_xcq(cq)); + + return 0; +} + +static int xsc_calc_sq_size(struct xsc_context *ctx, + struct ibv_qp_init_attr_ex *attr, + struct xsc_qp *qp) +{ + int wqe_size; + int wq_size; + int wq_size_min = 0; + + if (!attr->cap.max_send_wr) + return 0; + + wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->send_ds_shift); + + wq_size = xsc_round_up_power_of_two(attr->cap.max_send_wr); + + if (attr->qp_type != IBV_QPT_RAW_PACKET) + wq_size_min = XSC_SEND_WQE_RING_DEPTH_MIN; + if (wq_size < wq_size_min) { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", + wq_size, wq_size_min); + wq_size = wq_size_min; + } + + if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "WQE size %u exceeds WQE ring depth, set it as %u\n", + wq_size, XSC_SEND_WQE_RING_DEPTH_MAX); + wq_size = XSC_SEND_WQE_RING_DEPTH_MAX; + } + + qp->max_inline_data = attr->cap.max_inline_data; + qp->sq.wqe_cnt = wq_size; + qp->sq.ds_cnt = wq_size << ctx->send_ds_shift; + qp->sq.seg_cnt = 1 << ctx->send_ds_shift; + qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift; + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = qp->sq.wqe_cnt; + if (attr->cap.max_inline_data > + (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg)) + return -EINVAL; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n", + qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift); + + return wqe_size * qp->sq.wqe_cnt; +} + +enum { + DV_CREATE_WQ_SUPPORTED_COMP_MASK = XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ +}; + +static int xsc_calc_rwq_size(struct xsc_context *ctx, + struct xsc_rwq *rwq, + struct ibv_wq_init_attr *attr, + struct xscdv_wq_init_attr *xwq_attr) +{ + size_t wqe_size; + int wq_size; + uint32_t num_scatter; + int is_mprq = 0; + int scat_spc; + + if (!attr->max_wr) + return -EINVAL; + if (xwq_attr) { + if (!check_comp_mask(xwq_attr->comp_mask, + DV_CREATE_WQ_SUPPORTED_COMP_MASK)) + return -EINVAL; + + is_mprq = !!(xwq_attr->comp_mask & + XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ); + } + + /* TBD: check caps for RQ */ + num_scatter = max_t(uint32_t, attr->max_sge, 1); + wqe_size = sizeof(struct xsc_wqe_data_seg) * num_scatter + + sizeof(struct xsc_wqe_srq_next_seg) * is_mprq; + + if (rwq->wq_sig) + wqe_size += sizeof(struct xsc_rwqe_sig); + + if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz) + return -EINVAL; + + wqe_size = xsc_round_up_power_of_two(wqe_size); + wq_size = xsc_round_up_power_of_two(attr->max_wr) * wqe_size; + wq_size = max(wq_size, XSC_SEND_WQE_BB); + rwq->rq.wqe_cnt = wq_size / wqe_size; + rwq->rq.wqe_shift = xsc_ilog2(wqe_size); + rwq->rq.max_post = 1 << xsc_ilog2(wq_size / wqe_size); + scat_spc = wqe_size - + ((rwq->wq_sig) ? sizeof(struct xsc_rwqe_sig) : 0) - + is_mprq * sizeof(struct xsc_wqe_srq_next_seg); + rwq->rq.max_gs = scat_spc / sizeof(struct xsc_wqe_data_seg); + return wq_size; +} + +static int xsc_calc_rq_size(struct xsc_context *ctx, + struct ibv_qp_init_attr_ex *attr, + struct xsc_qp *qp) +{ + int wqe_size; + int wq_size; + int wq_size_min = 0; + + if (!attr->cap.max_recv_wr) + return 0; + + wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift); + + wq_size = xsc_round_up_power_of_two(attr->cap.max_recv_wr); + /* due to hardware limit, rdma rq depth should be one send wqe ds num at least*/ + if (attr->qp_type != IBV_QPT_RAW_PACKET) + wq_size_min = ctx->send_ds_num; + if (wq_size < wq_size_min) { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n", + wq_size, wq_size_min); + wq_size = wq_size_min; + } + + if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "WQE size %u exceeds WQE ring depth, set it as %u\n", + wq_size, XSC_RECV_WQE_RING_DEPTH_MAX); + wq_size = XSC_RECV_WQE_RING_DEPTH_MAX; + } + + qp->rq.wqe_cnt = wq_size; + qp->rq.ds_cnt = qp->rq.wqe_cnt << ctx->recv_ds_shift; + qp->rq.seg_cnt = 1 << ctx->recv_ds_shift; + qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift; + qp->rq.max_post = qp->rq.wqe_cnt; + qp->rq.max_gs = attr->cap.max_recv_sge; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Recv WQE count:%u, max post:%u wqe shift:%u\n", + qp->rq.wqe_cnt, qp->rq.max_post, qp->rq.wqe_shift); + return wqe_size * qp->rq.wqe_cnt; +} + +static int xsc_calc_wq_size(struct xsc_context *ctx, + struct ibv_qp_init_attr_ex *attr, + struct xsc_qp *qp) +{ + int ret; + int result; + + ret = xsc_calc_sq_size(ctx, attr, qp); + if (ret < 0) + return ret; + + result = ret; + + ret = xsc_calc_rq_size(ctx, attr, qp); + if (ret < 0) + return ret; + + result += ret; + + qp->sq.offset = ret; + qp->rq.offset = 0; + + return result; +} + +static const char *qptype2key(enum ibv_qp_type type) +{ + switch (type) { + case IBV_QPT_RC: return "HUGE_RC"; + case IBV_QPT_UC: return "HUGE_UC"; + case IBV_QPT_UD: return "HUGE_UD"; + case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH"; + default: return "HUGE_NA"; + } +} + +static int xsc_alloc_qp_buf(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr, + struct xsc_qp *qp, + int size) +{ + int err; + enum xsc_alloc_type alloc_type; + enum xsc_alloc_type default_alloc_type = XSC_ALLOC_TYPE_ANON; + const char *qp_huge_key; + + if (qp->sq.wqe_cnt) { + qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid)); + if (!qp->sq.wrid) { + errno = ENOMEM; + err = -1; + return err; + } + + qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data)); + if (!qp->sq.wr_data) { + errno = ENOMEM; + err = -1; + goto ex_wrid; + } + + qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head)); + if (!qp->sq.wqe_head) { + errno = ENOMEM; + err = -1; + goto ex_wrid; + } + + qp->sq.need_flush = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.need_flush)); + if (!qp->sq.need_flush) { + errno = ENOMEM; + err = -1; + goto ex_wrid; + } + memset(qp->sq.need_flush, 0, qp->sq.wqe_cnt); + + qp->sq.wr_opcode = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_opcode)); + if (!qp->sq.wr_opcode) { + errno = ENOMEM; + err = -1; + goto ex_wrid; + } + } + + if (qp->rq.wqe_cnt) { + qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); + if (!qp->rq.wrid) { + errno = ENOMEM; + err = -1; + goto ex_wrid; + } + } + + /* compatibility support */ + qp_huge_key = qptype2key(qp->ibv_qp->qp_type); + if (xsc_use_huge(qp_huge_key)) + default_alloc_type = XSC_ALLOC_TYPE_HUGE; + + xsc_get_alloc_type(to_xctx(context), XSC_QP_PREFIX, &alloc_type, + default_alloc_type); + + err = xsc_alloc_prefered_buf(to_xctx(context), &qp->buf, + align(qp->buf_size, to_xdev + (context->device)->page_size), + to_xdev(context->device)->page_size, + alloc_type, + XSC_QP_PREFIX); + + if (err) { + err = -ENOMEM; + goto ex_wrid; + } + + memset(qp->buf.buf, 0, qp->buf_size); + + if (attr->qp_type == IBV_QPT_RAW_PACKET || + qp->flags & XSC_QP_FLAGS_USE_UNDERLAY) { + size_t aligned_sq_buf_size = align(qp->sq_buf_size, + to_xdev(context->device)->page_size); + /* For Raw Packet QP, allocate a separate buffer for the SQ */ + err = xsc_alloc_prefered_buf(to_xctx(context), &qp->sq_buf, + aligned_sq_buf_size, + to_xdev(context->device)->page_size, + alloc_type, + XSC_QP_PREFIX); + if (err) { + err = -ENOMEM; + goto rq_buf; + } + + memset(qp->sq_buf.buf, 0, aligned_sq_buf_size); + } + + return 0; +rq_buf: + xsc_free_actual_buf(to_xctx(context), &qp->buf); +ex_wrid: + if (qp->rq.wrid) + free(qp->rq.wrid); + + if (qp->sq.wqe_head) + free(qp->sq.wqe_head); + + if (qp->sq.wr_data) + free(qp->sq.wr_data); + if (qp->sq.wrid) + free(qp->sq.wrid); + + if (qp->sq.need_flush) + free(qp->sq.need_flush); + + if (qp->sq.wr_opcode) + free(qp->sq.wr_opcode); + + return err; +} + +static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp) +{ + xsc_free_actual_buf(ctx, &qp->buf); + + if (qp->sq_buf.buf) + xsc_free_actual_buf(ctx, &qp->sq_buf); + + if (qp->rq.wrid) + free(qp->rq.wrid); + + if (qp->sq.wqe_head) + free(qp->sq.wqe_head); + + if (qp->sq.wrid) + free(qp->sq.wrid); + + if (qp->sq.wr_data) + free(qp->sq.wr_data); + + if (qp->sq.need_flush) + free(qp->sq.need_flush); + + if (qp->sq.wr_opcode) + free(qp->sq.wr_opcode); +} + +enum { + XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | + IBV_QP_INIT_ATTR_CREATE_FLAGS +}; + +enum { + XSC_DV_CREATE_QP_SUP_COMP_MASK = XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS | + XSCDV_QP_INIT_ATTR_MASK_DC +}; + +enum { + XSC_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS | + IBV_QP_INIT_ATTR_MAX_TSO_HEADER | + IBV_QP_INIT_ATTR_IND_TABLE | + IBV_QP_INIT_ATTR_RX_HASH), +}; + +enum { + XSCDV_QP_CREATE_SUP_FLAGS = + (XSCDV_QP_CREATE_TUNNEL_OFFLOADS | + XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC | + XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC | + XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE | + XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE), +}; + +static struct ibv_qp *create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr, + struct xscdv_qp_init_attr *xqp_attr) +{ + struct xsc_create_qp cmd; + struct xsc_create_qp_resp resp; + struct xsc_create_qp_ex_resp resp_ex; + struct xsc_qp *qp; + int ret; + struct xsc_context *ctx = to_xctx(context); + struct ibv_qp *ibqp; + struct xsc_parent_domain *xparent_domain; + struct xsc_device *xdev = to_xdev(context->device); + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "comp_mask=0x%x.\n", attr->comp_mask); + + if (attr->comp_mask & ~XSC_CREATE_QP_SUP_COMP_MASK) { + xsc_err("Not supported comp_mask:0x%x\n", attr->comp_mask); + return NULL; + } + + qp = calloc(1, sizeof(*qp)); + if (!qp) { + xsc_err("QP calloc failed\n"); + return NULL; + } + + ibqp = (struct ibv_qp *)&qp->verbs_qp; + qp->ibv_qp = ibqp; + + memset(&cmd, 0, sizeof(cmd)); + memset(&resp, 0, sizeof(resp)); + memset(&resp_ex, 0, sizeof(resp_ex)); + + ret = xsc_calc_wq_size(ctx, attr, qp); + if (ret < 0) { + xsc_err("Calculate WQ size failed\n"); + errno = EINVAL; + goto err; + } + + qp->buf_size = ret; + qp->sq_buf_size = 0; + + if (xsc_alloc_qp_buf(context, attr, qp, ret)) { + xsc_err("Alloc QP buffer failed\n"); + errno = ENOMEM; + goto err; + } + + qp->sq_start = qp->buf.buf + qp->sq.offset; + qp->rq_start = qp->buf.buf + qp->rq.offset; + qp->sq.qend = qp->buf.buf + qp->sq.offset + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "sq start:%p, sq qend:%p, buffer size:%u\n", + qp->sq_start, qp->sq.qend, qp->buf_size); + + xsc_init_qp_indices(qp); + + if (xsc_spinlock_init_pd(&qp->sq.lock, attr->pd) || + xsc_spinlock_init_pd(&qp->rq.lock, attr->pd)) + goto err_free_qp_buf; + + cmd.buf_addr = (uintptr_t) qp->buf.buf; + cmd.db_addr = (uintptr_t) qp->db; + cmd.sq_wqe_count = qp->sq.ds_cnt; + cmd.rq_wqe_count = qp->rq.ds_cnt; + cmd.rq_wqe_shift = qp->rq.wqe_shift; + + if (attr->qp_type == IBV_QPT_RAW_PACKET) { + if (attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) { + if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TSO) { + cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO;/*revert to command flags*/ + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "revert create_flags(0x%x) to cmd_flags(0x%x)\n", + attr->create_flags, cmd.flags); + } + + if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TX) { + cmd.flags |= XSC_QP_FLAG_RAWPACKET_TX;/*revert to command flags*/ + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, + "revert create_flags(0x%x) to cmd_flags(0x%x)\n", + attr->create_flags, cmd.flags); + } + attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS; + } + } + + pthread_mutex_lock(&ctx->qp_table_mutex); + + xparent_domain = to_xparent_domain(attr->pd); + + ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, + &cmd.ibv_cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp)); + if (ret) { + xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret); + errno = ret; + goto err_free_uidx; + } + + if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { + ret = xsc_store_qp(ctx, ibqp->qp_num, qp); + if (ret) { + xsc_err("xsc_store_qp failed,ret %d\n", ret); + errno = EINVAL; + goto err_destroy; + } + } + + pthread_mutex_unlock(&ctx->qp_table_mutex); + + qp->rq.max_post = qp->rq.wqe_cnt; + + if (attr->sq_sig_all) + qp->sq_signal_bits = 1; + else + qp->sq_signal_bits = 0; + + attr->cap.max_send_wr = qp->sq.max_post; + attr->cap.max_recv_wr = qp->rq.max_post; + attr->cap.max_recv_sge = qp->rq.max_gs; + + qp->rsc.type = XSC_RSC_TYPE_QP; + qp->rsc.rsn = ibqp->qp_num; + + if (xparent_domain) + atomic_fetch_add(&xparent_domain->xpd.refcount, 1); + + qp->rqn = ibqp->qp_num; + qp->sqn = ibqp->qp_num; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "qp rqn:%u, sqn:%u\n", qp->rqn, qp->sqn); + qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1)); + qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1)); + + return ibqp; + +err_destroy: + ibv_cmd_destroy_qp(ibqp); + +err_free_uidx: + pthread_mutex_unlock(&to_xctx(context)->qp_table_mutex); + +err_free_qp_buf: + xsc_free_qp_buf(ctx, qp); + +err: + free(qp); + + return NULL; +} + +struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr) +{ + struct ibv_qp *qp; + struct ibv_qp_init_attr_ex attrx; + + memset(&attrx, 0, sizeof(attrx)); + memcpy(&attrx, attr, sizeof(*attr)); + attrx.comp_mask = IBV_QP_INIT_ATTR_PD; + attrx.pd = pd; + qp = create_qp(pd->context, &attrx, NULL); + if (qp) + memcpy(attr, &attrx, sizeof(*attr)); + + return qp; +} + +static void xsc_lock_cqs(struct ibv_qp *qp) +{ + struct xsc_cq *send_cq = to_xcq(qp->send_cq); + struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + xsc_spin_lock(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + xsc_spin_lock(&send_cq->lock); + xsc_spin_lock(&recv_cq->lock); + } else { + xsc_spin_lock(&recv_cq->lock); + xsc_spin_lock(&send_cq->lock); + } + } else if (send_cq) { + xsc_spin_lock(&send_cq->lock); + } else if (recv_cq) { + xsc_spin_lock(&recv_cq->lock); + } +} + +static void xsc_unlock_cqs(struct ibv_qp *qp) +{ + struct xsc_cq *send_cq = to_xcq(qp->send_cq); + struct xsc_cq *recv_cq = to_xcq(qp->recv_cq); + + if (send_cq && recv_cq) { + if (send_cq == recv_cq) { + xsc_spin_unlock(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + xsc_spin_unlock(&recv_cq->lock); + xsc_spin_unlock(&send_cq->lock); + } else { + xsc_spin_unlock(&send_cq->lock); + xsc_spin_unlock(&recv_cq->lock); + } + } else if (send_cq) { + xsc_spin_unlock(&send_cq->lock); + } else if (recv_cq) { + xsc_spin_unlock(&recv_cq->lock); + } +} + +int xsc_destroy_qp(struct ibv_qp *ibqp) +{ + struct xsc_qp *qp = to_xqp(ibqp); + struct xsc_context *ctx = to_xctx(ibqp->context); + int ret; + struct xsc_parent_domain *xparent_domain = to_xparent_domain(ibqp->pd); + struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node; + + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "\n"); + + pthread_mutex_lock(&ctx->qp_table_mutex); + + ret = ibv_cmd_destroy_qp(ibqp); + if (ret) { + pthread_mutex_unlock(&ctx->qp_table_mutex); + return ret; + } + + xsc_lock_cqs(ibqp); + + list_for_each_safe(&to_xcq(ibqp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) { + if (err_rq_node->qp_id == qp->rsc.rsn) { + list_del(&err_rq_node->entry); + free(err_rq_node); + } + } + + list_for_each_safe(&to_xcq(ibqp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) { + if (err_sq_node->qp_id == qp->rsc.rsn) { + list_del(&err_sq_node->entry); + free(err_sq_node); + } + } + + __xsc_cq_clean(to_xcq(ibqp->recv_cq), qp->rsc.rsn); + if (ibqp->send_cq != ibqp->recv_cq) + __xsc_cq_clean(to_xcq(ibqp->send_cq), qp->rsc.rsn); + + if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) + xsc_clear_qp(ctx, ibqp->qp_num); + + xsc_unlock_cqs(ibqp); + pthread_mutex_unlock(&ctx->qp_table_mutex); + + xsc_free_qp_buf(ctx, qp); + + if (xparent_domain) + atomic_fetch_sub(&xparent_domain->xpd.refcount, 1); + + free(qp); + + return 0; +} + +int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr) +{ + struct ibv_query_qp cmd; + struct xsc_qp *qp = to_xqp(ibqp); + int ret; + + xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP, "\n"); + + if (qp->rss_qp) + return ENOSYS; + + ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); + if (ret) + return ret; + + init_attr->cap.max_send_wr = qp->sq.max_post; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; + + attr->cap = init_attr->cap; + + return 0; +} + +enum { + XSC_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT, +}; + +int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask) +{ + struct ibv_modify_qp cmd = {}; + struct xsc_qp *xqp = to_xqp(qp); + int ret; + + xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, "\n"); + ret = ibv_cmd_modify_qp(qp, attr, attr_mask, + &cmd, sizeof(cmd)); + + if (!ret && (attr_mask & IBV_QP_STATE) && + attr->qp_state == IBV_QPS_RESET) { + if (qp->recv_cq) { + xsc_cq_clean(to_xcq(qp->recv_cq), xqp->rsc.rsn); + } + if (qp->send_cq != qp->recv_cq && qp->send_cq) + xsc_cq_clean(to_xcq(qp->send_cq), + to_xqp(qp)->rsc.rsn); + + xsc_init_qp_indices(xqp); + } + + if (!ret && (attr_mask & IBV_QP_STATE)) + qp->state = attr->qp_state; + + /*workaround: generate flush err cqe if qp status turns to ERR*/ + if (!ret && (attr_mask & IBV_QP_STATE)) + ret = xsc_err_state_qp(qp, attr->cur_qp_state, attr->qp_state); + + return ret; +} + +int xsc_modify_qp_rate_limit(struct ibv_qp *qp, + struct ibv_qp_rate_limit_attr *attr) +{ + struct ibv_qp_attr qp_attr = {}; + struct ib_uverbs_ex_modify_qp_resp resp = {}; + struct xsc_modify_qp cmd = {}; + struct xsc_context *xctx = to_xctx(qp->context); + int ret; + + if (attr->comp_mask) + return EINVAL; + + if ((attr->max_burst_sz || + attr->typical_pkt_sz) && + (!attr->rate_limit || + !(xctx->packet_pacing_caps.cap_flags & + XSC_IB_PP_SUPPORT_BURST))) + return EINVAL; + + cmd.burst_info.max_burst_sz = attr->max_burst_sz; + cmd.burst_info.typical_pkt_sz = attr->typical_pkt_sz; + qp_attr.rate_limit = attr->rate_limit; + + ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_RATE_LIMIT, + &cmd.ibv_cmd, + sizeof(cmd), + &resp, + sizeof(resp)); + + return ret; +} + +/* + * IB spec version 1.3. Table 224 Rate to xsc rate + * conversion table on best effort basis. + */ +static const uint8_t ib_to_xsc_rate_table[] = { + 0, /* Invalid to unlimited */ + 0, /* Invalid to unlimited */ + 7, /* 2.5 Gbps */ + 8, /* 10Gbps */ + 9, /* 30Gbps */ + 10, /* 5 Gbps */ + 11, /* 20 Gbps */ + 12, /* 40 Gbps */ + 13, /* 60 Gbps */ + 14, /* 80 Gbps */ + 15, /* 120 Gbps */ + 11, /* 14 Gbps to 20 Gbps */ + 13, /* 56 Gbps to 60 Gbps */ + 15, /* 112 Gbps to 120 Gbps */ + 0, /* 168 Gbps to unlimited */ + 9, /* 25 Gbps to 30 Gbps */ + 15, /* 100 Gbps to 120 Gbps */ + 0, /* 200 Gbps to unlimited */ + 0, /* 300 Gbps to unlimited */ +}; + +static uint8_t ah_attr_to_xsc_rate(enum ibv_rate ah_static_rate) +{ + if (ah_static_rate >= ARRAY_SIZE(ib_to_xsc_rate_table)) + return 0; + return ib_to_xsc_rate_table[ah_static_rate]; +} + +#define RROCE_UDP_SPORT_MIN 0xC000 +#define RROCE_UDP_SPORT_MAX 0xFFFF +struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + struct xsc_context *ctx = to_xctx(pd->context); + struct ibv_port_attr port_attr; + struct xsc_ah *ah; + uint8_t static_rate; + uint32_t gid_type; + __be32 tmp; + uint8_t grh; + bool is_eth; + bool grh_req; + + if (attr->port_num < 1 || attr->port_num > ctx->num_ports) + return NULL; + + if (ctx->cached_link_layer[attr->port_num - 1]) { + is_eth = ctx->cached_link_layer[attr->port_num - 1] == + IBV_LINK_LAYER_ETHERNET; + grh_req = ctx->cached_port_flags[attr->port_num - 1] & + IBV_QPF_GRH_REQUIRED; + } else { + if (ibv_query_port(pd->context, attr->port_num, &port_attr)) + return NULL; + + is_eth = port_attr.link_layer == IBV_LINK_LAYER_ETHERNET; + grh_req = port_attr.flags & IBV_QPF_GRH_REQUIRED; + } + + if (unlikely((!attr->is_global) && (is_eth || grh_req))) { + errno = EINVAL; + return NULL; + } + + ah = calloc(1, sizeof *ah); + if (!ah) + return NULL; + + static_rate = ah_attr_to_xsc_rate(attr->static_rate); + if (is_eth) { + if (ibv_query_gid_type(pd->context, attr->port_num, + attr->grh.sgid_index, &gid_type)) + goto err; + + if (gid_type == IBV_GID_TYPE_ROCE_V2) + ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1 + - RROCE_UDP_SPORT_MIN) + + RROCE_UDP_SPORT_MIN); + /* Since RoCE packets must contain GRH, this bit is reserved + * for RoCE and shouldn't be set. + */ + grh = 0; + ah->av.stat_rate_sl = (static_rate << 4) | ((attr->sl & 0x7) << 1); + } else { + ah->av.fl_mlid = attr->src_path_bits & 0x7f; + ah->av.rlid = htobe16(attr->dlid); + grh = 1; + ah->av.stat_rate_sl = (static_rate << 4) | (attr->sl & 0xf); + } + if (attr->is_global) { + ah->av.tclass = attr->grh.traffic_class; + ah->av.hop_limit = attr->grh.hop_limit; + tmp = htobe32((grh << 30) | + ((attr->grh.sgid_index & 0xff) << 20) | + (attr->grh.flow_label & 0xfffff)); + ah->av.grh_gid_fl = tmp; + memcpy(ah->av.rgid, attr->grh.dgid.raw, 16); + } + + if (is_eth) { + if (ctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_CREATE_AH) { + struct xsc_create_ah_resp resp = {}; + + if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp))) + goto err; + + ah->kern_ah = true; + memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE); + } else { + uint16_t vid; + + if (ibv_resolve_eth_l2_from_gid(pd->context, attr, + ah->av.rmac, &vid)) + goto err; + } + } + + return &ah->ibv_ah; +err: + free(ah); + return NULL; +} + +int xsc_destroy_ah(struct ibv_ah *ah) +{ + struct xsc_ah *xah = to_xah(ah); + int err; + + if (xah->kern_ah) { + err = ibv_cmd_destroy_ah(ah); + if (err) + return err; + } + + free(xah); + return 0; +} + +int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) +{ + return ibv_cmd_attach_mcast(qp, gid, lid); +} + +int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) +{ + return ibv_cmd_detach_mcast(qp, gid, lid); +} + +struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr) +{ + return create_qp(context, attr, NULL); +} + +struct ibv_qp *xscdv_create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_attr, + struct xscdv_qp_init_attr *xqp_attr) +{ + return create_qp(context, qp_attr, xqp_attr); +} + +struct ibv_xrcd * +xsc_open_xrcd(struct ibv_context *context, + struct ibv_xrcd_init_attr *xrcd_init_attr) +{ + int err; + struct verbs_xrcd *xrcd; + struct ibv_open_xrcd cmd = {}; + struct ib_uverbs_open_xrcd_resp resp = {}; + + xrcd = calloc(1, sizeof(*xrcd)); + if (!xrcd) + return NULL; + + err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr, + &cmd, sizeof(cmd), &resp, sizeof(resp)); + if (err) { + free(xrcd); + return NULL; + } + + return &xrcd->xrcd; +} + +int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd) +{ + struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); + int ret; + + ret = ibv_cmd_close_xrcd(xrcd); + if (!ret) + free(xrcd); + + return ret; +} + +int xsc_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, + size_t attr_size) +{ + struct xsc_context *xctx = to_xctx(context); + struct xsc_query_device_ex_resp resp = {}; + size_t resp_size = + (xctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE) ? + sizeof(resp) : + sizeof(resp.ibv_resp); + struct ibv_device_attr *a; + union xsc_ib_fw_ver raw_fw_ver; + int err; + + raw_fw_ver.data = 0; + err = ibv_cmd_query_device_any(context, input, attr, attr_size, + &resp.ibv_resp, &resp_size); + if (err) + return err; + + if (attr_size >= offsetofend(struct ibv_device_attr_ex, tso_caps)) { + attr->tso_caps.max_tso = resp.tso_caps.max_tso; + attr->tso_caps.supported_qpts = resp.tso_caps.supported_qpts; + } + if (attr_size >= offsetofend(struct ibv_device_attr_ex, rss_caps)) { + attr->rss_caps.rx_hash_fields_mask = + resp.rss_caps.rx_hash_fields_mask; + attr->rss_caps.rx_hash_function = + resp.rss_caps.rx_hash_function; + } + if (attr_size >= + offsetofend(struct ibv_device_attr_ex, packet_pacing_caps)) { + attr->packet_pacing_caps.qp_rate_limit_min = + resp.packet_pacing_caps.qp_rate_limit_min; + attr->packet_pacing_caps.qp_rate_limit_max = + resp.packet_pacing_caps.qp_rate_limit_max; + attr->packet_pacing_caps.supported_qpts = + resp.packet_pacing_caps.supported_qpts; + } + + if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_ALLOW_MPW) + xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED; + + if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_SUPPORT_EMPW) + xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW; + + xctx->cqe_comp_caps.max_num = resp.cqe_comp_caps.max_num; + xctx->cqe_comp_caps.supported_format = resp.cqe_comp_caps.supported_format; + xctx->sw_parsing_caps.sw_parsing_offloads = + resp.sw_parsing_caps.sw_parsing_offloads; + xctx->sw_parsing_caps.supported_qpts = + resp.sw_parsing_caps.supported_qpts; + xctx->striding_rq_caps.min_single_stride_log_num_of_bytes = + resp.striding_rq_caps.min_single_stride_log_num_of_bytes; + xctx->striding_rq_caps.max_single_stride_log_num_of_bytes = + resp.striding_rq_caps.max_single_stride_log_num_of_bytes; + xctx->striding_rq_caps.min_single_wqe_log_num_of_strides = + resp.striding_rq_caps.min_single_wqe_log_num_of_strides; + xctx->striding_rq_caps.max_single_wqe_log_num_of_strides = + resp.striding_rq_caps.max_single_wqe_log_num_of_strides; + xctx->striding_rq_caps.supported_qpts = + resp.striding_rq_caps.supported_qpts; + xctx->tunnel_offloads_caps = resp.tunnel_offloads_caps; + xctx->packet_pacing_caps = resp.packet_pacing_caps; + + if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP) + xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP; + + if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD) + xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD; + + raw_fw_ver.data = resp.ibv_resp.base.fw_ver; + a = &attr->orig_attr; + xsc_set_fw_version(a, &raw_fw_ver); + + return 0; +} + +static int rwq_sig_enabled(struct ibv_context *context) +{ + char *env; + + env = getenv("XSC_RWQ_SIGNATURE"); + if (env) + return 1; + + return 0; +} + +static void xsc_free_rwq_buf(struct xsc_rwq *rwq, struct ibv_context *context) +{ + struct xsc_context *ctx = to_xctx(context); + + xsc_free_actual_buf(ctx, &rwq->buf); + free(rwq->rq.wrid); +} + +static int xsc_alloc_rwq_buf(struct ibv_context *context, + struct xsc_rwq *rwq, + int size) +{ + int err; + enum xsc_alloc_type alloc_type; + + xsc_get_alloc_type(to_xctx(context), XSC_RWQ_PREFIX, + &alloc_type, XSC_ALLOC_TYPE_ANON); + + rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t)); + if (!rwq->rq.wrid) { + errno = ENOMEM; + return -1; + } + + err = xsc_alloc_prefered_buf(to_xctx(context), &rwq->buf, + align(rwq->buf_size, to_xdev + (context->device)->page_size), + to_xdev(context->device)->page_size, + alloc_type, + XSC_RWQ_PREFIX); + + if (err) { + free(rwq->rq.wrid); + errno = ENOMEM; + return -1; + } + + return 0; +} + +static struct ibv_wq *create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr, + struct xscdv_wq_init_attr *xwq_attr) +{ + struct xsc_create_wq cmd; + struct xsc_create_wq_resp resp; + int err; + struct xsc_rwq *rwq; + struct xsc_context *ctx = to_xctx(context); + int ret; + int32_t usr_idx = 0; + + if (attr->wq_type != IBV_WQT_RQ) + return NULL; + + memset(&cmd, 0, sizeof(cmd)); + memset(&resp, 0, sizeof(resp)); + + rwq = calloc(1, sizeof(*rwq)); + if (!rwq) + return NULL; + + rwq->wq_sig = rwq_sig_enabled(context); + if (rwq->wq_sig) + cmd.flags = XSC_WQ_FLAG_SIGNATURE; + + ret = xsc_calc_rwq_size(ctx, rwq, attr, xwq_attr); + if (ret < 0) { + errno = -ret; + goto err; + } + + rwq->buf_size = ret; + if (xsc_alloc_rwq_buf(context, rwq, ret)) + goto err; + + xsc_init_rwq_indices(rwq); + + if (xsc_spinlock_init_pd(&rwq->rq.lock, attr->pd)) + goto err_free_rwq_buf; + + rwq->db = xsc_alloc_dbrec(ctx); + if (!rwq->db) + goto err_free_rwq_buf; + + rwq->db[XSC_RCV_DBR] = 0; + rwq->db[XSC_SND_DBR] = 0; + rwq->pbuff = rwq->buf.buf + rwq->rq.offset; + rwq->recv_db = &rwq->db[XSC_RCV_DBR]; + cmd.buf_addr = (uintptr_t)rwq->buf.buf; + cmd.db_addr = (uintptr_t)rwq->db; + cmd.rq_wqe_count = rwq->rq.wqe_cnt; + cmd.rq_wqe_shift = rwq->rq.wqe_shift; + usr_idx = xsc_store_uidx(ctx, rwq); + if (usr_idx < 0) { + xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Couldn't find free user index\n"); + goto err_free_db_rec; + } + + cmd.user_index = usr_idx; + + if (xwq_attr) { + if (xwq_attr->comp_mask & XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ) { + if ((xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes < + ctx->striding_rq_caps.min_single_stride_log_num_of_bytes) || + (xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes > + ctx->striding_rq_caps.max_single_stride_log_num_of_bytes)) { + errno = EINVAL; + goto err_create; + } + + if ((xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides < + ctx->striding_rq_caps.min_single_wqe_log_num_of_strides) || + (xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides > + ctx->striding_rq_caps.max_single_wqe_log_num_of_strides)) { + errno = EINVAL; + goto err_create; + } + + cmd.single_stride_log_num_of_bytes = + xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes; + cmd.single_wqe_log_num_of_strides = + xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides; + cmd.two_byte_shift_en = + xwq_attr->striding_rq_attrs.two_byte_shift_en; + cmd.comp_mask |= XSC_IB_CREATE_WQ_STRIDING_RQ; + } + } + + err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, sizeof(resp)); + if (err) + goto err_create; + + rwq->rsc.type = XSC_RSC_TYPE_RWQ; + rwq->rsc.rsn = cmd.user_index; + + rwq->wq.post_recv = xsc_post_wq_recv; + return &rwq->wq; + +err_create: + xsc_clear_uidx(ctx, cmd.user_index); +err_free_db_rec: + xsc_free_db(to_xctx(context), rwq->db); +err_free_rwq_buf: + xsc_free_rwq_buf(rwq, context); +err: + free(rwq); + return NULL; +} + +struct ibv_wq *xsc_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr) +{ + return create_wq(context, attr, NULL); +} + +struct ibv_wq *xscdv_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr, + struct xscdv_wq_init_attr *xwq_attr) +{ + return create_wq(context, attr, xwq_attr); +} + +int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr) +{ + struct xsc_modify_wq cmd = {}; + struct xsc_rwq *rwq = to_xrwq(wq); + + if ((attr->attr_mask & IBV_WQ_ATTR_STATE) && + attr->wq_state == IBV_WQS_RDY) { + if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) && + attr->curr_wq_state != wq->state) + return -EINVAL; + + if (wq->state == IBV_WQS_RESET) { + xsc_spin_lock(&to_xcq(wq->cq)->lock); + __xsc_cq_clean(to_xcq(wq->cq), + rwq->rsc.rsn); + xsc_spin_unlock(&to_xcq(wq->cq)->lock); + xsc_init_rwq_indices(rwq); + rwq->db[XSC_RCV_DBR] = 0; + rwq->db[XSC_SND_DBR] = 0; + } + } + + return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd)); +} + +int xsc_destroy_wq(struct ibv_wq *wq) +{ + struct xsc_rwq *rwq = to_xrwq(wq); + int ret; + + ret = ibv_cmd_destroy_wq(wq); + if (ret) + return ret; + + xsc_spin_lock(&to_xcq(wq->cq)->lock); + __xsc_cq_clean(to_xcq(wq->cq), rwq->rsc.rsn); + xsc_spin_unlock(&to_xcq(wq->cq)->lock); + xsc_clear_uidx(to_xctx(wq->context), rwq->rsc.rsn); + xsc_free_db(to_xctx(wq->context), rwq->db); + xsc_free_rwq_buf(rwq, wq->context); + free(rwq); + + return 0; +} + +static void free_flow_counters_descriptions(struct xsc_ib_create_flow *cmd) +{ + int i; + + for (i = 0; i < cmd->ncounters_data; i++) + free(cmd->data[i].counters_data); +} + +static int get_flow_mcounters(struct xsc_flow *mflow, + struct ibv_flow_attr *flow_attr, + struct xsc_counters **mcounters, + uint32_t *data_size) +{ + struct ibv_flow_spec *ib_spec; + uint32_t ncounters_used = 0; + int i; + + ib_spec = (struct ibv_flow_spec *)(flow_attr + 1); + for (i = 0; i < flow_attr->num_of_specs; i++, ib_spec = (void *)ib_spec + ib_spec->hdr.size) { + if (ib_spec->hdr.type != IBV_FLOW_SPEC_ACTION_COUNT) + continue; + + /* currently support only one counters data */ + if (ncounters_used > 0) + return EINVAL; + + *mcounters = to_mcounters(ib_spec->flow_count.counters); + ncounters_used++; + } + + *data_size = ncounters_used * sizeof(struct xsc_ib_flow_counters_data); + return 0; +} + +static int allocate_flow_counters_descriptions(struct xsc_counters *mcounters, + struct xsc_ib_create_flow *cmd) +{ + struct xsc_ib_flow_counters_data *mcntrs_data; + struct xsc_ib_flow_counters_desc *cntrs_data; + struct xsc_counter_node *cntr_node; + uint32_t ncounters; + int j = 0; + + mcntrs_data = cmd->data; + ncounters = mcounters->ncounters; + + /* xsc_attach_counters_point_flow was never called */ + if (!ncounters) + return EINVAL; + + /* each counter has both index and description */ + cntrs_data = calloc(ncounters, sizeof(*cntrs_data)); + if (!cntrs_data) + return ENOMEM; + + list_for_each(&mcounters->counters_list, cntr_node, entry) { + cntrs_data[j].description = cntr_node->desc; + cntrs_data[j].index = cntr_node->index; + j++; + } + + scrub_ptr_attr(cntrs_data); + mcntrs_data[cmd->ncounters_data].counters_data = cntrs_data; + mcntrs_data[cmd->ncounters_data].ncounters = ncounters; + cmd->ncounters_data++; + + return 0; +} + +struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr) +{ + struct xsc_ib_create_flow *cmd; + uint32_t required_cmd_size = 0; + struct ibv_flow *flow_id; + struct xsc_flow *mflow; + int ret; + + mflow = calloc(1, sizeof(*mflow)); + if (!mflow) { + errno = ENOMEM; + return NULL; + } + + ret = get_flow_mcounters(mflow, flow_attr, &mflow->mcounters, &required_cmd_size); + if (ret) { + errno = ret; + goto err_get_mcounters; + } + + required_cmd_size += sizeof(*cmd); + cmd = calloc(1, required_cmd_size); + if (!cmd) { + errno = ENOMEM; + goto err_get_mcounters; + } + + if (mflow->mcounters) { + pthread_mutex_lock(&mflow->mcounters->lock); + /* if the counters already bound no need to pass its description */ + if (!mflow->mcounters->refcount) { + ret = allocate_flow_counters_descriptions(mflow->mcounters, cmd); + if (ret) { + errno = ret; + goto err_desc_alloc; + } + } + } + + flow_id = &mflow->flow_id; + ret = ibv_cmd_create_flow(qp, flow_id, flow_attr, + cmd, required_cmd_size); + if (ret) + goto err_create_flow; + + if (mflow->mcounters) { + free_flow_counters_descriptions(cmd); + mflow->mcounters->refcount++; + pthread_mutex_unlock(&mflow->mcounters->lock); + } + + free(cmd); + + return flow_id; + +err_create_flow: + if (mflow->mcounters) { + free_flow_counters_descriptions(cmd); + pthread_mutex_unlock(&mflow->mcounters->lock); + } +err_desc_alloc: + free(cmd); +err_get_mcounters: + free(mflow); + return NULL; +} + +int xsc_destroy_flow(struct ibv_flow *flow_id) +{ + struct xsc_flow *mflow = to_mflow(flow_id); + int ret; + + ret = ibv_cmd_destroy_flow(flow_id); + if (ret) + return ret; + + if (mflow->mcounters) { + pthread_mutex_lock(&mflow->mcounters->lock); + mflow->mcounters->refcount--; + pthread_mutex_unlock(&mflow->mcounters->lock); + } + + free(mflow); + return 0; +} + +struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr) +{ + struct ibv_create_rwq_ind_table *cmd; + struct xsc_create_rwq_ind_table_resp resp; + struct ibv_rwq_ind_table *ind_table; + uint32_t required_tbl_size; + int num_tbl_entries; + int cmd_size; + int err; + + num_tbl_entries = 1 << init_attr->log_ind_tbl_size; + /* Data must be u64 aligned */ + required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ? + sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t)); + + cmd_size = required_tbl_size + sizeof(*cmd); + cmd = calloc(1, cmd_size); + if (!cmd) + return NULL; + + memset(&resp, 0, sizeof(resp)); + ind_table = calloc(1, sizeof(*ind_table)); + if (!ind_table) + goto free_cmd; + + err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, + &resp.ibv_resp, sizeof(resp)); + if (err) + goto err; + + free(cmd); + return ind_table; + +err: + free(ind_table); +free_cmd: + free(cmd); + return NULL; +} + +int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) +{ + int ret; + + ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table); + + if (ret) + return ret; + + free(rwq_ind_table); + return 0; +} + +int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr) +{ + struct ibv_modify_cq cmd = {}; + + return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd)); +} + +static struct ibv_flow_action *_xsc_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *attr, + struct ibv_command_buffer *driver_attr) +{ + struct verbs_flow_action *action; + int ret; + + if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) { + errno = EOPNOTSUPP; + return NULL; + } + + action = calloc(1, sizeof(*action)); + if (!action) { + errno = ENOMEM; + return NULL; + } + + ret = ibv_cmd_create_flow_action_esp(ctx, attr, action, driver_attr); + if (ret) { + free(action); + return NULL; + } + + return &action->action; +} + +struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *attr) +{ + return _xsc_create_flow_action_esp(ctx, attr, NULL); +} + +struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *esp, + struct xscdv_flow_action_esp *xattr) +{ + DECLARE_COMMAND_BUFFER_LINK(driver_attr, UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 1, + NULL); + + if (!check_comp_mask(xattr->comp_mask, + XSCDV_FLOW_ACTION_ESP_MASK_FLAGS)) { + errno = EOPNOTSUPP; + return NULL; + } + + if (xattr->comp_mask & XSCDV_FLOW_ACTION_ESP_MASK_FLAGS) { + if (!check_comp_mask(xattr->action_flags, + XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)) { + errno = EOPNOTSUPP; + return NULL; + } + fill_attr_in_uint64(driver_attr, XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + xattr->action_flags); + } + + return _xsc_create_flow_action_esp(ctx, esp, driver_attr); +} + +int xsc_modify_flow_action_esp(struct ibv_flow_action *action, + struct ibv_flow_action_esp_attr *attr) +{ + struct verbs_flow_action *vaction = + container_of(action, struct verbs_flow_action, action); + + if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) + return EOPNOTSUPP; + + return ibv_cmd_modify_flow_action_esp(vaction, attr, NULL); +} + +struct ibv_flow_action *xscdv_create_flow_action_modify_header(struct ibv_context *ctx, + size_t actions_sz, + uint64_t actions[], + enum xscdv_flow_table_type ft_type) +{ + DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, + XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + 3); + struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, + XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct verbs_flow_action *action; + int ret; + + fill_attr_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + actions, actions_sz); + fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + ft_type); + + action = calloc(1, sizeof(*action)); + if (!action) { + errno = ENOMEM; + return NULL; + } + + ret = execute_ioctl(ctx, cmd); + if (ret) { + free(action); + return NULL; + } + + action->action.context = ctx; + action->type = IBV_FLOW_ACTION_UNSPECIFIED; + action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + handle); + + return &action->action; +} + +struct ibv_flow_action * +xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, + size_t data_sz, + void *data, + enum xscdv_flow_action_packet_reformat_type reformat_type, + enum xscdv_flow_table_type ft_type) +{ + DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION, + XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 4); + struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd, + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct verbs_flow_action *action; + int ret; + + if ((!data && data_sz) || (data && !data_sz)) { + errno = EINVAL; + return NULL; + } + + if (data && data_sz) + fill_attr_in(cmd, + XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + data, data_sz); + + fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + reformat_type); + + fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + ft_type); + + action = calloc(1, sizeof(*action)); + if (!action) { + errno = ENOMEM; + return NULL; + } + + ret = execute_ioctl(ctx, cmd); + if (ret) { + free(action); + return NULL; + } + + action->action.context = ctx; + action->type = IBV_FLOW_ACTION_UNSPECIFIED; + action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + handle); + + return &action->action; +} + +int xsc_destroy_flow_action(struct ibv_flow_action *action) +{ + struct verbs_flow_action *vaction = + container_of(action, struct verbs_flow_action, action); + int ret = ibv_cmd_destroy_flow_action(vaction); + + if (!ret) + free(action); + + return ret; +} + +static inline int xsc_access_dm(struct ibv_dm *ibdm, uint64_t dm_offset, + void *host_addr, size_t length, + uint32_t read) +{ + struct xsc_dm *dm = to_xdm(ibdm); + atomic_uint32_t *dm_ptr = + (atomic_uint32_t *)dm->start_va + dm_offset / 4; + uint32_t *host_ptr = host_addr; + const uint32_t *host_end = host_ptr + length / 4; + + if (dm_offset + length > dm->length) + return EFAULT; + + /* Due to HW limitation, DM access address and length must be aligned + * to 4 bytes. + */ + if ((length & 3) || (dm_offset & 3)) + return EINVAL; + + /* Copy granularity should be 4 Bytes since we enforce copy size to be + * a multiple of 4 bytes. + */ + if (read) { + while (host_ptr != host_end) { + *host_ptr = atomic_load_explicit(dm_ptr, + memory_order_relaxed); + host_ptr++; + dm_ptr++; + } + } else { + while (host_ptr != host_end) { + atomic_store_explicit(dm_ptr, *host_ptr, + memory_order_relaxed); + host_ptr++; + dm_ptr++; + } + } + + return 0; +} +static inline int xsc_memcpy_to_dm(struct ibv_dm *ibdm, uint64_t dm_offset, + const void *host_addr, size_t length) +{ + return xsc_access_dm(ibdm, dm_offset, (void *)host_addr, length, 0); +} + +static inline int xsc_memcpy_from_dm(void *host_addr, struct ibv_dm *ibdm, + uint64_t dm_offset, size_t length) +{ + return xsc_access_dm(ibdm, dm_offset, host_addr, length, 1); +} + +struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, + struct ibv_alloc_dm_attr *dm_attr) +{ + DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC, + 2); + int page_size = to_xdev(context->device)->page_size; + struct xsc_context *xctx = to_xctx(context); + uint64_t act_size, start_offset; + struct xsc_dm *dm; + uint16_t page_idx; + off_t offset = 0; + void *va; + + if (!check_comp_mask(dm_attr->comp_mask, 0)) { + errno = EINVAL; + return NULL; + } + + if (dm_attr->length > xctx->max_dm_size) { + errno = EINVAL; + return NULL; + } + + dm = calloc(1, sizeof(*dm)); + if (!dm) { + errno = ENOMEM; + return NULL; + } + + + fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &start_offset, sizeof(start_offset)); + fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); + + if (ibv_cmd_alloc_dm(context, dm_attr, &dm->verbs_dm, cmdb)) + goto err_free_mem; + + act_size = align(dm_attr->length, page_size); + set_command(XSC_IB_MMAP_DEVICE_MEM, &offset); + set_extended_index(page_idx, &offset); + va = mmap(NULL, act_size, PROT_READ | PROT_WRITE, + MAP_SHARED, context->cmd_fd, + page_size * offset); + if (va == MAP_FAILED) + goto err_free_dm; + + dm->mmap_va = va; + dm->length = dm_attr->length; + dm->start_va = va + (start_offset & (page_size - 1)); + dm->verbs_dm.dm.memcpy_to_dm = xsc_memcpy_to_dm; + dm->verbs_dm.dm.memcpy_from_dm = xsc_memcpy_from_dm; + + return &dm->verbs_dm.dm; + +err_free_dm: + ibv_cmd_free_dm(&dm->verbs_dm); + +err_free_mem: + free(dm); + + return NULL; +} + +int xsc_free_dm(struct ibv_dm *ibdm) +{ + struct xsc_device *xdev = to_xdev(ibdm->context->device); + struct xsc_dm *dm = to_xdm(ibdm); + size_t act_size = align(dm->length, xdev->page_size); + int ret; + + ret = ibv_cmd_free_dm(&dm->verbs_dm); + + if (ret) + return ret; + + munmap(dm->mmap_va, act_size); + free(dm); + return 0; +} + +struct ibv_counters *xsc_create_counters(struct ibv_context *context, + struct ibv_counters_init_attr *init_attr) +{ + struct xsc_counters *mcntrs; + int ret; + + if (!check_comp_mask(init_attr->comp_mask, 0)) { + errno = EOPNOTSUPP; + return NULL; + } + + mcntrs = calloc(1, sizeof(*mcntrs)); + if (!mcntrs) { + errno = ENOMEM; + return NULL; + } + + pthread_mutex_init(&mcntrs->lock, NULL); + ret = ibv_cmd_create_counters(context, + init_attr, + &mcntrs->vcounters, + NULL); + if (ret) + goto err_create; + + list_head_init(&mcntrs->counters_list); + + return &mcntrs->vcounters.counters; + +err_create: + free(mcntrs); + return NULL; +} + +int xsc_destroy_counters(struct ibv_counters *counters) +{ + struct xsc_counters *mcntrs = to_mcounters(counters); + struct xsc_counter_node *tmp, *cntrs_node; + int ret; + + ret = ibv_cmd_destroy_counters(&mcntrs->vcounters); + if (ret) + return ret; + + list_for_each_safe(&mcntrs->counters_list, cntrs_node, tmp, entry) { + list_del(&cntrs_node->entry); + free(cntrs_node); + } + + free(mcntrs); + return 0; +} + +int xsc_attach_counters_point_flow(struct ibv_counters *counters, + struct ibv_counter_attach_attr *attr, + struct ibv_flow *flow) +{ + struct xsc_counters *mcntrs = to_mcounters(counters); + struct xsc_counter_node *cntrs_node; + int ret; + + /* The driver supports only the static binding mode as part of ibv_create_flow */ + if (flow) + return ENOTSUP; + + if (!check_comp_mask(attr->comp_mask, 0)) + return EOPNOTSUPP; + + /* Check whether the attached counter is supported */ + if (attr->counter_desc < IBV_COUNTER_PACKETS || + attr->counter_desc > IBV_COUNTER_BYTES) + return ENOTSUP; + + cntrs_node = calloc(1, sizeof(*cntrs_node)); + if (!cntrs_node) + return ENOMEM; + + pthread_mutex_lock(&mcntrs->lock); + /* The counter is bound to a flow, attach is not allowed */ + if (mcntrs->refcount) { + ret = EBUSY; + goto err_already_bound; + } + + cntrs_node->index = attr->index; + cntrs_node->desc = attr->counter_desc; + list_add(&mcntrs->counters_list, &cntrs_node->entry); + mcntrs->ncounters++; + pthread_mutex_unlock(&mcntrs->lock); + + return 0; + +err_already_bound: + pthread_mutex_unlock(&mcntrs->lock); + free(cntrs_node); + return ret; +} + +int xsc_read_counters(struct ibv_counters *counters, + uint64_t *counters_value, + uint32_t ncounters, + uint32_t flags) +{ + struct xsc_counters *mcntrs = to_mcounters(counters); + + return ibv_cmd_read_counters(&mcntrs->vcounters, + counters_value, + ncounters, + flags, + NULL); + +} + +struct xscdv_flow_matcher * +xscdv_create_flow_matcher(struct ibv_context *context, + struct xscdv_flow_matcher_attr *attr) +{ + DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, + XSC_IB_METHOD_FLOW_MATCHER_CREATE, + 5); + struct xscdv_flow_matcher *flow_matcher; + struct ib_uverbs_attr *handle; + int ret; + + if (!check_comp_mask(attr->comp_mask, 0)) { + errno = EOPNOTSUPP; + return NULL; + } + + flow_matcher = calloc(1, sizeof(*flow_matcher)); + if (!flow_matcher) { + errno = ENOMEM; + return NULL; + } + + if (attr->type != IBV_FLOW_ATTR_NORMAL) { + errno = EOPNOTSUPP; + goto err; + } + + handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + attr->match_mask->match_buf, + attr->match_mask->match_sz); + fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + &attr->match_criteria_enable, sizeof(attr->match_criteria_enable)); + fill_attr_in_enum(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + IBV_FLOW_ATTR_NORMAL, &attr->priority, + sizeof(attr->priority)); + if (attr->flags) + fill_attr_const_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + attr->flags); + + ret = execute_ioctl(context, cmd); + if (ret) + goto err; + + flow_matcher->context = context; + flow_matcher->handle = read_attr_obj(XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, handle); + + return flow_matcher; + +err: + free(flow_matcher); + return NULL; +} + +int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *flow_matcher) +{ + DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER, + XSC_IB_METHOD_FLOW_MATCHER_DESTROY, + 1); + int ret; + + fill_attr_in_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, flow_matcher->handle); + ret = execute_ioctl(flow_matcher->context, cmd); + verbs_is_destroy_err(&ret); + + if (ret) + return ret; + + free(flow_matcher); + return 0; +} + +#define CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED 8 +struct ibv_flow * +xscdv_create_flow(struct xscdv_flow_matcher *flow_matcher, + struct xscdv_flow_match_parameters *match_value, + size_t num_actions, + struct xscdv_flow_action_attr actions_attr[]) +{ + uint32_t flow_actions[CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED]; + struct verbs_flow_action *vaction; + int num_flow_actions = 0; + struct xsc_flow *mflow; + bool have_qp = false; + bool have_dest_devx = false; + bool have_flow_tag = false; + int ret; + int i; + DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW, + XSC_IB_METHOD_CREATE_FLOW, + 6); + struct ib_uverbs_attr *handle; + enum xscdv_flow_action_type type; + + mflow = calloc(1, sizeof(*mflow)); + if (!mflow) { + errno = ENOMEM; + return NULL; + } + + handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_HANDLE); + fill_attr_in(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + match_value->match_buf, + match_value->match_sz); + fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCHER, flow_matcher->handle); + + for (i = 0; i < num_actions; i++) { + type = actions_attr[i].type; + switch (type) { + case XSCDV_FLOW_ACTION_DEST_IBV_QP: + if (have_qp || have_dest_devx) { + errno = EOPNOTSUPP; + goto err; + } + fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_QP, + actions_attr[i].qp->handle); + have_qp = true; + break; + case XSCDV_FLOW_ACTION_IBV_FLOW_ACTION: + if (num_flow_actions == + CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED) { + errno = EOPNOTSUPP; + goto err; + } + vaction = container_of(actions_attr[i].action, + struct verbs_flow_action, + action); + + flow_actions[num_flow_actions] = vaction->handle; + num_flow_actions++; + break; + case XSCDV_FLOW_ACTION_DEST_DEVX: + if (have_dest_devx || have_qp) { + errno = EOPNOTSUPP; + goto err; + } + fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX, + actions_attr[i].obj->handle); + have_dest_devx = true; + break; + case XSCDV_FLOW_ACTION_TAG: + if (have_flow_tag) { + errno = EINVAL; + goto err; + } + fill_attr_in_uint32(cmd, + XSC_IB_ATTR_CREATE_FLOW_TAG, + actions_attr[i].tag_value); + have_flow_tag = true; + break; + default: + errno = EOPNOTSUPP; + goto err; + } + } + + if (num_flow_actions) + fill_attr_in_objs_arr(cmd, + XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + flow_actions, + num_flow_actions); + ret = execute_ioctl(flow_matcher->context, cmd); + if (ret) + goto err; + + mflow->flow_id.handle = read_attr_obj(XSC_IB_ATTR_CREATE_FLOW_HANDLE, handle); + mflow->flow_id.context = flow_matcher->context; + return &mflow->flow_id; +err: + free(mflow); + return NULL; +} + +struct xscdv_devx_umem * +xscdv_devx_umem_reg(struct ibv_context *context, void *addr, size_t size, uint32_t access) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_UMEM, + XSC_IB_METHOD_DEVX_UMEM_REG, + 5); + struct ib_uverbs_attr *handle; + struct xsc_devx_umem *umem; + int ret; + + umem = calloc(1, sizeof(*umem)); + if (!umem) { + errno = ENOMEM; + return NULL; + } + + if (ibv_dontfork_range(addr, size)) + goto err; + + fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, (intptr_t)addr); + fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_LEN, size); + fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, access); + fill_attr_out(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + &umem->dv_devx_umem.umem_id, + sizeof(umem->dv_devx_umem.umem_id)); + handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE); + + ret = execute_ioctl(context, cmd); + if (ret) + goto err_umem_reg_cmd; + + umem->handle = read_attr_obj(XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE, handle); + umem->context = context; + umem->addr = addr; + umem->size = size; + + return &umem->dv_devx_umem; + +err_umem_reg_cmd: + ibv_dofork_range(addr, size); +err: + free(umem); + return NULL; +} + +int xscdv_devx_umem_dereg(struct xscdv_devx_umem *dv_devx_umem) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_UMEM, + XSC_IB_METHOD_DEVX_UMEM_DEREG, + 1); + int ret; + struct xsc_devx_umem *umem = container_of(dv_devx_umem, struct xsc_devx_umem, + dv_devx_umem); + + fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, umem->handle); + ret = execute_ioctl(umem->context, cmd); + if (ret) + return ret; + + ibv_dofork_range(umem->addr, umem->size); + free(umem); + return 0; +} + +struct xscdv_devx_obj * +xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_OBJ, + XSC_IB_METHOD_DEVX_OBJ_CREATE, + 3); + struct ib_uverbs_attr *handle; + struct xscdv_devx_obj *obj; + int ret; + + obj = calloc(1, sizeof(*obj)); + if (!obj) { + errno = ENOMEM; + return NULL; + } + + handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, in, inlen); + fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, out, outlen); + + ret = execute_ioctl(context, cmd); + if (ret) + goto err; + + obj->handle = read_attr_obj(XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, handle); + obj->context = context; + return obj; +err: + free(obj); + return NULL; +} + +int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_OBJ, + XSC_IB_METHOD_DEVX_OBJ_QUERY, + 3); + + fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, obj->handle); + fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, in, inlen); + fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, out, outlen); + + return execute_ioctl(obj->context, cmd); +} + +int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_OBJ, + XSC_IB_METHOD_DEVX_OBJ_MODIFY, + 3); + + fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, obj->handle); + fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, in, inlen); + fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, out, outlen); + + return execute_ioctl(obj->context, cmd); +} + +int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX_OBJ, + XSC_IB_METHOD_DEVX_OBJ_DESTROY, + 1); + int ret; + + fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, obj->handle); + ret = execute_ioctl(obj->context, cmd); + + if (ret) + return ret; + free(obj); + return 0; +} + +int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX, + XSC_IB_METHOD_DEVX_OTHER, + 2); + + fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_IN, in, inlen); + fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, out, outlen); + + return execute_ioctl(context, cmd); +} + +int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, + uint32_t *eqn) +{ + DECLARE_COMMAND_BUFFER(cmd, + XSC_IB_OBJECT_DEVX, + XSC_IB_METHOD_DEVX_QUERY_EQN, + 2); + + fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, vector); + fill_attr_out_ptr(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, eqn); + + return execute_ioctl(context, cmd); +} diff --git a/providers/xscale/wqe.h b/providers/xscale/wqe.h new file mode 100644 index 0000000..4b7f327 --- /dev/null +++ b/providers/xscale/wqe.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef WQE_H +#define WQE_H + +#include + +struct xsc_wqe_eth_pad { + uint8_t rsvd0[16]; +}; + +struct xsc_wqe_xrc_seg { + __be32 xrc_srqn; + uint8_t rsvd[12]; +}; + +enum { + XSC_IPOIB_INLINE_MIN_HEADER_SIZE = 4, + XSC_SOURCE_QPN_INLINE_MAX_HEADER_SIZE = 18, + XSC_ETH_L2_INLINE_HEADER_SIZE = 18, + XSC_ETH_L2_MIN_HEADER_SIZE = 14, +}; + +struct xsc_wqe_umr_ctrl_seg { + uint8_t flags; + uint8_t rsvd0[3]; + __be16 klm_octowords; + __be16 translation_offset; + __be64 mkey_mask; + uint8_t rsvd1[32]; +}; + +struct xsc_wqe_umr_klm_seg { + /* up to 2GB */ + __be32 byte_count; + __be32 mkey; + __be64 address; +}; + +union xsc_wqe_umr_inline_seg { + struct xsc_wqe_umr_klm_seg klm; +}; + +struct xsc_wqe_mkey_context_seg { + uint8_t free; + uint8_t reserved1; + uint8_t access_flags; + uint8_t sf; + __be32 qpn_mkey; + __be32 reserved2; + __be32 flags_pd; + __be64 start_addr; + __be64 len; + __be32 bsf_octword_size; + __be32 reserved3[4]; + __be32 translations_octword_size; + uint8_t reserved4[3]; + uint8_t log_page_size; + __be32 reserved; + union xsc_wqe_umr_inline_seg inseg[0]; +}; + +struct xsc_rwqe_sig { + uint8_t rsvd0[4]; + uint8_t signature; + uint8_t rsvd1[11]; +}; + +#endif /* WQE_H */ diff --git a/providers/xscale/xsc-abi.h b/providers/xscale/xsc-abi.h new file mode 100644 index 0000000..7eab95c --- /dev/null +++ b/providers/xscale/xsc-abi.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSC_ABI_H +#define XSC_ABI_H + +#include +#include +#include +#include +#include "xscdv.h" + +#define XSC_UVERBS_MIN_ABI_VERSION 1 +#define XSC_UVERBS_MAX_ABI_VERSION 1 + +DECLARE_DRV_CMD(xsc_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, + xsc_ib_alloc_ucontext_req, xsc_ib_alloc_ucontext_resp); +DECLARE_DRV_CMD(xsc_create_ah, IB_USER_VERBS_CMD_CREATE_AH, + empty, xsc_ib_create_ah_resp); +DECLARE_DRV_CMD(xsc_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, + empty, xsc_ib_alloc_pd_resp); +DECLARE_DRV_CMD(xsc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, + xsc_ib_create_cq, xsc_ib_create_cq_resp); +DECLARE_DRV_CMD(xsc_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ, + xsc_ib_create_cq, xsc_ib_create_cq_resp); +DECLARE_DRV_CMD(xsc_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP, + xsc_ib_create_qp, xsc_ib_create_qp_resp); +DECLARE_DRV_CMD(xsc_create_qp_ex_rss, IB_USER_VERBS_EX_CMD_CREATE_QP, + xsc_ib_create_qp_rss, xsc_ib_create_qp_resp); +DECLARE_DRV_CMD(xsc_create_qp, IB_USER_VERBS_CMD_CREATE_QP, + xsc_ib_create_qp, xsc_ib_create_qp_resp); +DECLARE_DRV_CMD(xsc_create_wq, IB_USER_VERBS_EX_CMD_CREATE_WQ, + xsc_ib_create_wq, xsc_ib_create_wq_resp); +DECLARE_DRV_CMD(xsc_modify_wq, IB_USER_VERBS_EX_CMD_MODIFY_WQ, + xsc_ib_modify_wq, empty); +DECLARE_DRV_CMD(xsc_create_rwq_ind_table, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + empty, empty); +DECLARE_DRV_CMD(xsc_destroy_rwq_ind_table, IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + empty, empty); +DECLARE_DRV_CMD(xsc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ, + xsc_ib_resize_cq, empty); +DECLARE_DRV_CMD(xsc_query_device_ex, IB_USER_VERBS_EX_CMD_QUERY_DEVICE, + empty, xsc_ib_query_device_resp); +DECLARE_DRV_CMD(xsc_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP, + empty, xsc_ib_modify_qp_resp); + +struct xsc_modify_qp { + struct ibv_modify_qp_ex ibv_cmd; + __u32 comp_mask; + struct xsc_ib_burst_info burst_info; + __u32 reserved; +}; + +#endif /* XSC_ABI_H */ diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h new file mode 100644 index 0000000..c533019 --- /dev/null +++ b/providers/xscale/xsc_api.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSC_API_H +#define XSC_API_H + +#include + +#define xscdv_flow_action_flags xsc_ib_uapi_flow_action_flags +#define XSCDV_FLOW_ACTION_FLAGS_REQUIRE_METADATA XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +#define xscdv_flow_table_type xsc_ib_uapi_flow_table_type +#define XSCDV_FLOW_TABLE_TYPE_NIC_RX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX +#define XSCDV_FLOW_TABLE_TYPE_NIC_TX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX +#define xscdv_flow_action_packet_reformat_type xsc_ib_uapi_flow_action_packet_reformat_type +#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 +#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL +#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 +#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL + +enum xsc_qp_create_flags { + XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0, + XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0, + XSC_QP_CREATE_RAWPACKET_TX = 1 << 1, +}; + + +#endif diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h new file mode 100644 index 0000000..53fe552 --- /dev/null +++ b/providers/xscale/xsc_hsi.h @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef __XSC_HSI_H__ +#define __XSC_HSI_H__ + +#include +#include +#include "sqm_csr_defines.h" +#include "rqm_csr_defines.h" +#include "cqm_csr_defines.h" + +#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16)) +#define lower_32_bits(n) ((uint32_t)(n)) + +#define DMA_LO_LE(x) __cpu_to_le32(lower_32_bits(x)) +#define DMA_HI_LE(x) __cpu_to_le32(upper_32_bits(x)) +#define DMA_REGPAIR_LE(x, val) do { \ + (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)); \ + } while (0) + +#define WR_LE_16(x, val) x = __cpu_to_le16(val) +#define WR_LE_32(x, val) x = __cpu_to_le32(val) +#define WR_LE_64(x, val) x = __cpu_to_le64(val) +#define WR_LE_R64(x, val) DMA_REGPAIR_LE(x, val) +#define WR_BE_32(x, val) x = __cpu_to_be32(val) + +#define RD_LE_16(x) __le16_to_cpu(x) +#define RD_LE_32(x) __le32_to_cpu(x) +#define RD_BE_32(x) __be32_to_cpu(x) + +#define WR_REG(addr, val) mmio_write32_le(addr, val) +#define RD_REG(addr) mmio_read32_le(addr) + +/* message opcode */ +enum { + XSC_MSG_OPCODE_SEND = 0, + XSC_MSG_OPCODE_RDMA_WRITE = 1, + XSC_MSG_OPCODE_RDMA_READ = 2, + XSC_MSG_OPCODE_MAD = 3, + XSC_MSG_OPCODE_RDMA_ACK = 4, + XSC_MSG_OPCODE_RDMA_ACK_READ = 5, + XSC_MSG_OPCODE_RDMA_CNP = 6, + XSC_MSG_OPCODE_RAW = 7, + XSC_MSG_OPCODE_VIRTIO_NET = 8, + XSC_MSG_OPCODE_VIRTIO_BLK = 9, + XSC_MSG_OPCODE_RAW_TPE = 10, + XSC_MSG_OPCODE_INIT_QP_REQ = 11, + XSC_MSG_OPCODE_INIT_QP_RSP = 12, + XSC_MSG_OPCODE_INIT_PATH_REQ = 13, + XSC_MSG_OPCODE_INIT_PATH_RSP = 14, +}; + +enum { + XSC_REQ = 0, + XSC_RSP = 1, +}; + +enum { + XSC_WITHOUT_IMMDT = 0, + XSC_WITH_IMMDT = 1, +}; + +enum { + XSC_ERR_CODE_NAK_RETRY = 0x40, + XSC_ERR_CODE_NAK_OPCODE = 0x41, + XSC_ERR_CODE_NAK_MR = 0x42, + XSC_ERR_CODE_NAK_OPERATION = 0x43, + XSC_ERR_CODE_NAK_RNR = 0x44, + XSC_ERR_CODE_LOCAL_MR = 0x45, + XSC_ERR_CODE_LOCAL_LEN = 0x46, + XSC_ERR_CODE_LOCAL_OPCODE = 0x47, + XSC_ERR_CODE_CQ_OVER_FLOW = 0x48, + XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c, + XSC_ERR_CODE_CQE_ACC = 0x4d, + XSC_ERR_CODE_FLUSH = 0x4e, + XSC_ERR_CODE_MALF_WQE_HOST = 0x50, + XSC_ERR_CODE_MALF_WQE_INFO = 0x51, + XSC_ERR_CODE_MR_NON_NAK = 0x52, + XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61, + XSC_ERR_CODE_MANY_READ = 0x62, + XSC_ERR_CODE_LEN_GEN_CQE = 0x63, + XSC_ERR_CODE_MR = 0x65, + XSC_ERR_CODE_MR_GEN_CQE = 0x66, + XSC_ERR_CODE_OPERATION = 0x67, + XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68, +}; + +/* TODO: sw cqe opcode*/ +enum { + XSC_OPCODE_RDMA_REQ_SEND = 0, + XSC_OPCODE_RDMA_REQ_SEND_IMMDT = 1, + XSC_OPCODE_RDMA_RSP_RECV = 2, + XSC_OPCODE_RDMA_RSP_RECV_IMMDT = 3, + XSC_OPCODE_RDMA_REQ_WRITE = 4, + XSC_OPCODE_RDMA_REQ_WRITE_IMMDT = 5, + XSC_OPCODE_RDMA_RSP_WRITE_IMMDT = 6, + XSC_OPCODE_RDMA_REQ_READ = 7, + XSC_OPCODE_RDMA_REQ_ERROR = 8, + XSC_OPCODE_RDMA_RSP_ERROR = 9, + XSC_OPCODE_RDMA_CQE_ERROR = 10, +}; + +enum { + XSC_BASE_WQE_SHIFT = 4, +}; + +/* + * Descriptors that are allocated by SW and accessed by HW, 32-byte aligned + */ +/* this is to keep descriptor structures packed */ +struct regpair { + __le32 lo; + __le32 hi; +}; + +struct xsc_send_wqe_ctrl_seg { + uint8_t msg_opcode; + uint8_t with_immdt:1; + uint8_t :2; + uint8_t ds_data_num:5; + __le16 wqe_id; + __le32 msg_len; + __le32 opcode_data; + uint8_t se:1; + uint8_t ce:1; + uint8_t in_line:1; + __le32 :29; +}; + + +struct xsc_wqe_data_seg { + union { + struct { + uint8_t :1; + __le32 seg_len:31; + __le32 mkey; + __le64 va; + }; + struct { + uint8_t in_line_data[16]; + }; + }; +}; + +struct xsc_cqe { + union { + uint8_t msg_opcode; + struct { + uint8_t error_code:7; + uint8_t is_error:1; + }; + }; + __le32 qp_id:15; + uint8_t :1; + uint8_t se:1; + uint8_t has_pph:1; + uint8_t type:1; + uint8_t with_immdt:1; + uint8_t csum_err:4; + __le32 imm_data; + __le32 msg_len; + __le32 vni; + __le64 ts:48; + __le16 wqe_id; + __le16 rsv[3]; + __le16 rsv1:15; + uint8_t owner:1; +}; + +/* Size of CQE */ +#define XSC_CQE_SIZE sizeof(struct xsc_cqe) + +union xsc_db_data { + struct { + __le32 sq_next_pid:16; + __le32 sqn:15; + __le32 :1; + }; + struct { + __le32 rq_next_pid:13; + __le32 rqn:15; + __le32 :4; + }; + struct { + __le32 cq_next_cid:16; + __le32 cqn:15; + __le32 solicited:1; + }; + __le32 raw_data; +}; + +#define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n)) + +#define XSC_SEND_WQE_RING_DEPTH_MIN 16 +#define XSC_CQE_RING_DEPTH_MIN 2 +#define XSC_SEND_WQE_RING_DEPTH_MAX 1024 +#define XSC_RECV_WQE_RING_DEPTH_MAX 1024 +#define XSC_CQE_RING_DEPTH_MAX (1024 * 32) + +/* + * Registers that are allocated by HW and accessed by SW in 4-byte granularity + */ +/* MMT table (32 bytes) */ +struct xsc_mmt_tbl { + struct regpair pa; + struct regpair va; + __le32 size; +#define XSC_MMT_TBL_PD_MASK 0x00FFFFFF +#define XSC_MMT_TBL_KEY_MASK 0xFF000000 + __le32 key_pd; +#define XSC_MMT_TBL_ACC_MASK 0x0000000F + __le32 acc; + uint8_t padding[4]; +}; + +/* QP Context (16 bytes) */ +struct xsc_qp_context { +#define XSC_QP_CONTEXT_STATE_MASK 0x00000007 +#define XSC_QP_CONTEXT_FUNC_MASK 0x00000018 +#define XSC_QP_CONTEXT_DSTID_MASK 0x000000E0 +#define XSC_QP_CONTEXT_PD_MASK 0xFFFFFF00 + __le32 pd_dstid_func_state; +#define XSC_QP_CONTEXT_DSTQP_MASK 0x00FFFFFF +#define XSC_QP_CONTEXT_RCQIDL_MASK 0xFF000000 + __le32 rcqidl_dstqp; +#define XSC_QP_CONTEXT_RCQIDH_MASK 0x0000FFFF +#define XSC_QP_CONTEXT_SCQIDL_MASK 0xFFFF0000 + __le32 scqidl_rcqidh; +#define XSC_QP_CONTEXT_SCQIDH_MASK 0x000000FF + __le32 scqidh; +}; + +/* TODO: EPP Table and others */ + +static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ALWAYS_INLINE; + +static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ALWAYS_INLINE; + +static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) +{ + return cqe->owner == ((cid >> ring_sz) & 1); +} + +static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) +{ + cqe->owner = ((pid >> ring_sz) & 1); +} +#endif /* __XSC_HSI_H__ */ diff --git a/providers/xscale/xsc_hw.h b/providers/xscale/xsc_hw.h new file mode 100755 index 0000000..f2b0ce3 --- /dev/null +++ b/providers/xscale/xsc_hw.h @@ -0,0 +1,584 @@ +#ifndef _XSC_HW_H_ +#define _XSC_HW_H_ + +#include + +#include "xscale.h" + +struct xsc_andes_cqe { + union { + uint8_t msg_opcode; + struct { + uint8_t error_code:7; + uint8_t is_error:1; + }; + }; + __le32 qp_id:15; + uint8_t :1; + uint8_t se:1; + uint8_t has_pph:1; + uint8_t type:1; + uint8_t with_immdt:1; + uint8_t csum_err:4; + __le32 imm_data; + __le32 msg_len; + __le32 vni; + __le64 ts:48; + __le16 wqe_id; + __le16 rsv[3]; + __le16 rsv1:15; + uint8_t owner:1; +}; + +union xsc_andes_cq_doorbell { + struct { + uint32_t cq_next_cid:16; + uint32_t cq_id:15; + uint32_t arm:1; + }; + uint32_t val; +}; + +union xsc_andes_send_doorbell { + struct { + uint32_t next_pid:16; + uint32_t qp_id:15; + }; + uint32_t raw; +}; + +union xsc_andes_recv_doorbell { + struct { + uint32_t next_pid:13; + uint32_t qp_id:15; + }; + uint32_t raw; +}; + +struct xsc_andes_data_seg { + uint32_t :1; + uint32_t length:31; + uint32_t key; + uint64_t addr; +}; + +struct xsc_diamond_cqe { + uint8_t error_code; + __le32 qp_id:15; + uint8_t :1; + uint8_t se:1; + uint8_t has_pph:1; + uint8_t type:1; + uint8_t with_immdt:1; + uint8_t csum_err:4; + __le32 imm_data; + __le32 msg_len; + __le32 vni; + __le64 ts:48; + __le16 wqe_id; + uint8_t msg_opcode; + uint8_t rsv; + __le16 rsv1[2]; + __le16 rsv2:15; + uint8_t owner:1; +}; + +union xsc_diamond_cq_doorbell { + struct { + uint64_t cq_next_cid:23; + uint64_t cq_id:14; + uint64_t cq_sta:2; + }; + uint64_t raw; +}; + +union xsc_diamond_recv_doorbell { + struct { + uint64_t next_pid:14; + uint64_t qp_id:14; + }; + uint64_t raw; +}; + +union xsc_diamond_send_doorbell { + struct { + uint64_t next_pid:17; + uint64_t qp_id:14; + }; + uint64_t raw; +}; + +struct xsc_diamond_data_seg { + uint32_t length; + uint32_t key; + uint64_t addr; +}; + +union xsc_diamond_next_cq_doorbell { + struct { + uint64_t cq_next_cid:23; + uint64_t cq_id:10; + uint64_t cq_sta:2; + }; + uint64_t raw; +}; + +union xsc_diamond_next_send_doorbell { + struct { + uint64_t next_pid:17; + uint64_t qp_id:10; + }; + uint64_t raw; +}; + +union xsc_diamond_next_recv_doorbell { + struct { + uint64_t next_pid:14; + uint64_t qp_id:10; + }; + uint64_t raw; +}; + +enum { + XSC_CQ_STAT_FIRED, + XSC_CQ_STAT_KEEP, + XSC_CQ_STAT_ARM_NEXT, + XSC_CQ_STAT_ARM_SOLICITED, +}; + +#define XSC_HW_ALWAYS_INLINE inline __attribute__((always_inline)) + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_msg_opcode(void *cqe) +{ + return ((struct xsc_diamond_cqe *)cqe)->msg_opcode; +} + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_msg_opcode(void *cqe) +{ + return ((struct xsc_andes_cqe *)cqe)->msg_opcode; +} + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_msg_opcode(uint16_t device_id, void *cqe) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + return xsc_andes_get_cqe_msg_opcode(cqe); + case XSC_MC_PF_DEV_ID_DIAMOND: + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + return xsc_diamond_get_cqe_msg_opcode(cqe); + default: + return xsc_andes_get_cqe_msg_opcode(cqe); + } +} + +static XSC_HW_ALWAYS_INLINE bool xsc_diamond_is_err_cqe(void *cqe) +{ + return !!((struct xsc_diamond_cqe *)cqe)->error_code; +} + +static XSC_HW_ALWAYS_INLINE bool xsc_andes_is_err_cqe(void *cqe) +{ + return ((struct xsc_andes_cqe *)cqe)->is_error; +} + +static XSC_HW_ALWAYS_INLINE bool xsc_hw_is_err_cqe(uint16_t device_id, void *cqe) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + return xsc_andes_is_err_cqe(cqe); + case XSC_MC_PF_DEV_ID_DIAMOND: + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + return xsc_diamond_is_err_cqe(cqe); + default: + return xsc_andes_is_err_cqe(cqe); + } +} + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_err_code(void *cqe) +{ + return ((struct xsc_diamond_cqe *)cqe)->error_code; +} + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_err_code(void *cqe) +{ + return ((struct xsc_andes_cqe *)cqe)->error_code; +} + +static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_err_code(uint16_t device_id, void *cqe) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + return xsc_andes_get_cqe_err_code(cqe); + case XSC_MC_PF_DEV_ID_DIAMOND: + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + return xsc_diamond_get_cqe_err_code(cqe); + default: + return xsc_andes_get_cqe_err_code(cqe); + } +} + +static inline enum ibv_wc_status xsc_andes_cqe_err_code(uint8_t error_code) +{ + switch (error_code) { + case XSC_ANDES_ERR_CODE_NAK_RETRY: + return IBV_WC_RETRY_EXC_ERR; + case XSC_ANDES_ERR_CODE_NAK_OPCODE: + return IBV_WC_REM_INV_REQ_ERR; + case XSC_ANDES_ERR_CODE_NAK_MR: + return IBV_WC_REM_ACCESS_ERR; + case XSC_ANDES_ERR_CODE_NAK_OPERATION: + return IBV_WC_REM_OP_ERR; + case XSC_ANDES_ERR_CODE_NAK_RNR: + return IBV_WC_RNR_RETRY_EXC_ERR; + case XSC_ANDES_ERR_CODE_LOCAL_MR: + return IBV_WC_LOC_PROT_ERR; + case XSC_ANDES_ERR_CODE_LOCAL_LEN: + return IBV_WC_LOC_LEN_ERR; + case XSC_ANDES_ERR_CODE_LEN_GEN_CQE: + return IBV_WC_LOC_LEN_ERR; + case XSC_ANDES_ERR_CODE_OPERATION: + return IBV_WC_LOC_ACCESS_ERR; + case XSC_ANDES_ERR_CODE_FLUSH: + return IBV_WC_WR_FLUSH_ERR; + case XSC_ANDES_ERR_CODE_MALF_WQE_HOST: + case XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE: + case XSC_ANDES_ERR_CODE_STRG_ACC: + return IBV_WC_FATAL_ERR; + case XSC_ANDES_ERR_CODE_MR_GEN_CQE: + return IBV_WC_LOC_PROT_ERR; + case XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE: + return IBV_WC_LOC_QP_OP_ERR; + case XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE: + case XSC_ANDES_ERR_CODE_LOCAL_OPCODE: + default: + return IBV_WC_GENERAL_ERR; + } +} + +static inline enum ibv_wc_status xsc_diamond_cqe_err_code(uint8_t error_code) +{ + switch (error_code) { + case XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR: + case XSC_DIAMOND_ERR_CODE_RTO_REQ: + return IBV_WC_RETRY_EXC_ERR; + case XSC_DIAMOND_ERR_CODE_NAK_INV_REQ: + return IBV_WC_REM_INV_REQ_ERR; + case XSC_DIAMOND_ERR_CODE_NAK_MR: + return IBV_WC_REM_ACCESS_ERR; + case XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR: + return IBV_WC_REM_OP_ERR; + case XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ: + case XSC_DIAMOND_ERR_CODE_REMOTE_MR: + case XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE: + case XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP: + return IBV_WC_LOC_PROT_ERR; + case XSC_DIAMOND_ERR_CODE_LEN: + case XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE: + return IBV_WC_LOC_LEN_ERR; + case XSC_DIAMOND_ERR_CODE_FLUSH: + return IBV_WC_WR_FLUSH_ERR; + case XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA: + case XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ: + case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE: + case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP: + return IBV_WC_FATAL_ERR; + case XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT: + return IBV_WC_LOC_QP_OP_ERR; + default: + return IBV_WC_GENERAL_ERR; + } +} + +static XSC_HW_ALWAYS_INLINE enum ibv_wc_status xsc_hw_cqe_err_status(uint16_t device_id, + void *cqe) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe)); + case XSC_MC_PF_DEV_ID_DIAMOND: + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + return xsc_diamond_cqe_err_code(xsc_diamond_get_cqe_err_code(cqe)); + default: + return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe)); + } +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_data_seg(void *data_seg, + uint64_t addr, uint32_t key, + uint32_t length) +{ + struct xsc_diamond_data_seg *seg = data_seg; + + seg->length = length; + seg->key = key; + seg->addr = addr; +} + +static XSC_HW_ALWAYS_INLINE void xsc_andes_set_data_seg(void *data_seg, + uint64_t addr, uint32_t key, + uint32_t length) +{ + struct xsc_andes_data_seg *seg = data_seg; + + seg->length = length; + seg->key = key; + seg->addr = addr; +} + +static XSC_HW_ALWAYS_INLINE void xsc_hw_set_data_seg(uint16_t device_id, void *data_seg, + uint64_t addr, uint32_t key, uint32_t length) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + xsc_andes_set_data_seg(data_seg, addr, key, length); + break; + case XSC_MC_PF_DEV_ID_DIAMOND: + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + xsc_diamond_set_data_seg(data_seg, addr, key, length); + break; + default: + xsc_andes_set_data_seg(data_seg, addr, key, length); + } +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_cq_ci(void *db_addr, + uint32_t cqn, uint32_t next_cid) +{ + union xsc_diamond_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.cq_sta = XSC_CQ_STAT_FIRED; + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_set_cq_ci(void *db_addr, + uint32_t cqn, uint32_t next_cid) +{ + union xsc_diamond_next_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.cq_sta = XSC_CQ_STAT_FIRED; + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_andes_set_cq_ci(void *db_addr, + uint32_t cqn, uint32_t next_cid) +{ + union xsc_andes_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.arm = XSC_CQ_STAT_FIRED; + udma_to_device_barrier(); + mmio_write32_le(db_addr, db.val); +} + + +static XSC_HW_ALWAYS_INLINE void xsc_hw_set_cq_ci(uint16_t device_id, void *db_addr, + uint32_t cqn, uint32_t next_cid) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + xsc_andes_set_cq_ci(db_addr, cqn, next_cid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND: + xsc_diamond_set_cq_ci(db_addr, cqn, next_cid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + xsc_diamond_next_set_cq_ci(db_addr, cqn, next_cid); + break; + default: + xsc_andes_set_cq_ci(db_addr, cqn, next_cid); + } +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_update_cq_db(void *db_addr, + uint32_t cqn, uint32_t next_cid, + uint8_t solicited) +{ + union xsc_diamond_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT; + udma_to_device_barrier(); + mmio_wc_start(); + mmio_write64_le(db_addr, db.raw); + mmio_flush_writes(); +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_update_cq_db(void *db_addr, + uint32_t cqn, uint32_t next_cid, + uint8_t solicited) +{ + union xsc_diamond_next_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT; + udma_to_device_barrier(); + mmio_wc_start(); + mmio_write64_le(db_addr, db.raw); + mmio_flush_writes(); +} + +static XSC_HW_ALWAYS_INLINE void xsc_andes_update_cq_db(void *db_addr, + uint32_t cqn, uint32_t next_cid, + uint8_t solicited) +{ + union xsc_andes_cq_doorbell db; + + db.cq_id = cqn; + db.cq_next_cid = next_cid; + db.arm = solicited; + udma_to_device_barrier(); + mmio_wc_start(); + mmio_write32_le(db_addr, db.val); + mmio_flush_writes(); +} + +static XSC_HW_ALWAYS_INLINE void xsc_hw_update_cq_db(uint16_t device_id, void *db_addr, + uint32_t cqn, uint32_t next_cid, + uint8_t solicited) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited); + break; + case XSC_MC_PF_DEV_ID_DIAMOND: + xsc_diamond_update_cq_db(db_addr, cqn, next_cid, solicited); + break; + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + xsc_diamond_next_update_cq_db(db_addr, cqn, next_cid, solicited); + break; + default: + xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited); + } +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_rx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_diamond_recv_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_rx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_diamond_next_recv_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_rx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_andes_recv_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write32_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_rx_doorbell(uint16_t device_id, + void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND: + xsc_diamond_ring_rx_doorbell(db_addr, rqn, next_pid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + xsc_diamond_next_ring_rx_doorbell(db_addr, rqn, next_pid); + break; + default: + xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid); + } +} + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_tx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_diamond_send_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + + +static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_tx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_diamond_next_send_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write64_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_tx_doorbell(void *db_addr, + uint32_t rqn, uint32_t next_pid) +{ + union xsc_andes_send_doorbell db; + + db.qp_id = rqn; + db.next_pid = next_pid; + + udma_to_device_barrier(); + mmio_write32_le(db_addr, db.raw); +} + +static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_tx_doorbell(uint16_t device_id, + void *db_addr, + uint32_t sqn, uint32_t next_pid) +{ + switch (device_id) { + case XSC_MS_PF_DEV_ID: + case XSC_MS_VF_DEV_ID: + xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND: + xsc_diamond_ring_tx_doorbell(db_addr, sqn, next_pid); + break; + case XSC_MC_PF_DEV_ID_DIAMOND_NEXT: + xsc_diamond_next_ring_tx_doorbell(db_addr, sqn, next_pid); + break; + default: + xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid); + } +} + +#endif /* _XSC_HW_H_ */ diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c new file mode 100644 index 0000000..e6792b9 --- /dev/null +++ b/providers/xscale/xscale.c @@ -0,0 +1,948 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "xscale.h" +#include "xsc-abi.h" +#include "wqe.h" +#include "xsc_hsi.h" + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#ifndef CPU_OR +#define CPU_OR(x, y, z) do {} while (0) +#endif + +#ifndef CPU_EQUAL +#define CPU_EQUAL(x, y) 1 +#endif + +#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL) +static const struct verbs_match_ent hca_table[] = { + VERBS_MODALIAS_MATCH("*xscale*", NULL), + {} +}; + +uint32_t xsc_debug_mask = 0; +int xsc_freeze_on_error_cqe; +static void xsc_free_context(struct ibv_context *ibctx); + +static const struct verbs_context_ops xsc_ctx_common_ops = { + .query_port = xsc_query_port, + .alloc_pd = xsc_alloc_pd, + .dealloc_pd = xsc_free_pd, + .reg_mr = xsc_reg_mr, + .rereg_mr = xsc_rereg_mr, + .dereg_mr = xsc_dereg_mr, + .alloc_mw = NULL, + .dealloc_mw = NULL, + .bind_mw = NULL, + .create_cq = xsc_create_cq, + .poll_cq = xsc_poll_cq, + .req_notify_cq = xsc_arm_cq, + .cq_event = xsc_cq_event, + .resize_cq = xsc_resize_cq, + .destroy_cq = xsc_destroy_cq, + .create_srq = NULL, + .modify_srq = NULL, + .query_srq = NULL, + .destroy_srq = NULL, + .post_srq_recv = NULL, + .create_qp = xsc_create_qp, + .query_qp = xsc_query_qp, + .modify_qp = xsc_modify_qp, + .destroy_qp = xsc_destroy_qp, + .post_send = xsc_post_send, + .post_recv = xsc_post_recv, + .create_ah = xsc_create_ah, + .destroy_ah = xsc_destroy_ah, + .attach_mcast = xsc_attach_mcast, + .detach_mcast = xsc_detach_mcast, + + .alloc_dm = xsc_alloc_dm, + .alloc_parent_domain = xsc_alloc_parent_domain, + .alloc_td = NULL, + .attach_counters_point_flow = xsc_attach_counters_point_flow, + .close_xrcd = xsc_close_xrcd, + .create_counters = xsc_create_counters, + .create_cq_ex = xsc_create_cq_ex, + .create_flow = xsc_create_flow, + .create_flow_action_esp = xsc_create_flow_action_esp, + .create_qp_ex = xsc_create_qp_ex, + .create_rwq_ind_table = xsc_create_rwq_ind_table, + .create_srq_ex = NULL, + .create_wq = xsc_create_wq, + .dealloc_td = NULL, + .destroy_counters = xsc_destroy_counters, + .destroy_flow = xsc_destroy_flow, + .destroy_flow_action = xsc_destroy_flow_action, + .destroy_rwq_ind_table = xsc_destroy_rwq_ind_table, + .destroy_wq = xsc_destroy_wq, + .free_dm = xsc_free_dm, + .get_srq_num = NULL, + .modify_cq = xsc_modify_cq, + .modify_flow_action_esp = xsc_modify_flow_action_esp, + .modify_qp_rate_limit = xsc_modify_qp_rate_limit, + .modify_wq = xsc_modify_wq, + .open_xrcd = xsc_open_xrcd, + .post_srq_ops = NULL, + .query_device_ex = xsc_query_device_ex, + .query_rt_values = xsc_query_rt_values, + .read_counters = xsc_read_counters, + .reg_dm_mr = xsc_reg_dm_mr, + .alloc_null_mr = xsc_alloc_null_mr, + .free_context = xsc_free_context, +}; + +static int read_number_from_line(const char *line, int *value) +{ + const char *ptr; + + ptr = strchr(line, ':'); + if (!ptr) + return 1; + + ++ptr; + + *value = atoi(ptr); + return 0; +} +/** + * The function looks for the first free user-index in all the + * user-index tables. If all are used, returns -1, otherwise + * a valid user-index. + * In case the reference count of the table is zero, it means the + * table is not in use and wasn't allocated yet, therefore the + * xsc_store_uidx allocates the table, and increment the reference + * count on the table. + */ +static int32_t get_free_uidx(struct xsc_context *ctx) +{ + int32_t tind; + int32_t i; + + for (tind = 0; tind < XSC_UIDX_TABLE_SIZE; tind++) { + if (ctx->uidx_table[tind].refcnt < XSC_UIDX_TABLE_MASK) + break; + } + + if (tind == XSC_UIDX_TABLE_SIZE) + return -1; + + if (!ctx->uidx_table[tind].refcnt) + return tind << XSC_UIDX_TABLE_SHIFT; + + for (i = 0; i < XSC_UIDX_TABLE_MASK + 1; i++) { + if (!ctx->uidx_table[tind].table[i]) + break; + } + + return (tind << XSC_UIDX_TABLE_SHIFT) | i; +} + +int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc) +{ + int32_t tind; + int32_t ret = -1; + int32_t uidx; + + pthread_mutex_lock(&ctx->uidx_table_mutex); + uidx = get_free_uidx(ctx); + if (uidx < 0) + goto out; + + tind = uidx >> XSC_UIDX_TABLE_SHIFT; + + if (!ctx->uidx_table[tind].refcnt) { + ctx->uidx_table[tind].table = calloc(XSC_UIDX_TABLE_MASK + 1, + sizeof(struct xsc_resource *)); + if (!ctx->uidx_table[tind].table) + goto out; + } + + ++ctx->uidx_table[tind].refcnt; + ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = rsc; + ret = uidx; + +out: + pthread_mutex_unlock(&ctx->uidx_table_mutex); + return ret; +} + +void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx) +{ + int tind = uidx >> XSC_UIDX_TABLE_SHIFT; + + pthread_mutex_lock(&ctx->uidx_table_mutex); + + if (!--ctx->uidx_table[tind].refcnt) + free(ctx->uidx_table[tind].table); + else + ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = NULL; + + pthread_mutex_unlock(&ctx->uidx_table_mutex); +} + +static int xsc_is_sandy_bridge(int *num_cores) +{ + char line[128]; + FILE *fd; + int rc = 0; + int cur_cpu_family = -1; + int cur_cpu_model = -1; + + fd = fopen("/proc/cpuinfo", "r"); + if (!fd) + return 0; + + *num_cores = 0; + + while (fgets(line, 128, fd)) { + int value; + + /* if this is information on new processor */ + if (!strncmp(line, "processor", 9)) { + ++*num_cores; + + cur_cpu_family = -1; + cur_cpu_model = -1; + } else if (!strncmp(line, "cpu family", 10)) { + if ((cur_cpu_family < 0) && (!read_number_from_line(line, &value))) + cur_cpu_family = value; + } else if (!strncmp(line, "model", 5)) { + if ((cur_cpu_model < 0) && (!read_number_from_line(line, &value))) + cur_cpu_model = value; + } + + /* if this is a Sandy Bridge CPU */ + if ((cur_cpu_family == 6) && + (cur_cpu_model == 0x2A || (cur_cpu_model == 0x2D) )) + rc = 1; + } + + fclose(fd); + return rc; +} + +/* +man cpuset + + This format displays each 32-bit word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f"); words + are filled with leading zeros, if required. For masks longer than one word, a comma separator is used between + words. Words are displayed in big-endian order, which has the most significant bit first. The hex digits + within a word are also in big-endian order. + + The number of 32-bit words displayed is the minimum number needed to display all bits of the bitmask, based on + the size of the bitmask. + + Examples of the Mask Format: + + 00000001 # just bit 0 set + 40000000,00000000,00000000 # just bit 94 set + 000000ff,00000000 # bits 32-39 set + 00000000,000E3862 # 1,5,6,11-13,17-19 set + + A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as: + + 00000001,00000001,00010117 + + The first "1" is for bit 64, the second for bit 32, the third for bit 16, the fourth for bit 8, the fifth for + bit 4, and the "7" is for bits 2, 1, and 0. +*/ +static void xsc_local_cpu_set(struct ibv_device *ibdev, cpu_set_t *cpu_set) +{ + char *p, buf[1024] = {}; + char *env_value; + uint32_t word; + int i, k; + + env_value = getenv("XSC_LOCAL_CPUS"); + if (env_value) + strncpy(buf, env_value, sizeof(buf) - 1); + else { + char fname[MAXPATHLEN]; + FILE *fp; + + snprintf(fname, MAXPATHLEN, "/sys/class/infiniband/%s/device/local_cpus", + ibv_get_device_name(ibdev)); + + fp = fopen(fname, "r"); + if (!fp) { + fprintf(stderr, PFX "Warning: can not get local cpu set: failed to open %s\n", fname); + return; + } + if (!fgets(buf, sizeof(buf), fp)) { + fprintf(stderr, PFX "Warning: can not get local cpu set: failed to read cpu mask\n"); + fclose(fp); + return; + } + fclose(fp); + } + + p = strrchr(buf, ','); + if (!p) + p = buf; + + i = 0; + do { + if (*p == ',') { + *p = 0; + p ++; + } + + word = strtoul(p, NULL, 16); + + for (k = 0; word; ++k, word >>= 1) + if (word & 1) + CPU_SET(k+i, cpu_set); + + if (p == buf) + break; + + p = strrchr(buf, ','); + if (!p) + p = buf; + + i += 32; + } while (i < CPU_SETSIZE); +} + +static int xsc_enable_sandy_bridge_fix(struct ibv_device *ibdev) +{ + cpu_set_t my_cpus, dev_local_cpus, result_set; + int stall_enable; + int ret; + int num_cores; + + if (!xsc_is_sandy_bridge(&num_cores)) + return 0; + + /* by default enable stall on sandy bridge arch */ + stall_enable = 1; + + /* + * check if app is bound to cpu set that is inside + * of device local cpu set. Disable stalling if true + */ + + /* use static cpu set - up to CPU_SETSIZE (1024) cpus/node */ + CPU_ZERO(&my_cpus); + CPU_ZERO(&dev_local_cpus); + CPU_ZERO(&result_set); + ret = sched_getaffinity(0, sizeof(my_cpus), &my_cpus); + if (ret == -1) { + if (errno == EINVAL) + fprintf(stderr, PFX "Warning: my cpu set is too small\n"); + else + fprintf(stderr, PFX "Warning: failed to get my cpu set\n"); + goto out; + } + + /* get device local cpu set */ + xsc_local_cpu_set(ibdev, &dev_local_cpus); + + /* check if my cpu set is in dev cpu */ + CPU_OR(&result_set, &my_cpus, &dev_local_cpus); + stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1; + +out: + return stall_enable; +} + +static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx) +{ + char *env_value; + + env_value = getenv("XSC_STALL_CQ_POLL"); + if (env_value) + /* check if cq stall is enforced by user */ + ctx->stall_enable = (strcmp(env_value, "0")) ? 1 : 0; + else + /* autodetect if we need to do cq polling */ + ctx->stall_enable = xsc_enable_sandy_bridge_fix(ibdev); + + env_value = getenv("XSC_STALL_NUM_LOOP"); + if (env_value) + xsc_stall_num_loop = atoi(env_value); + + env_value = getenv("XSC_STALL_CQ_POLL_MIN"); + if (env_value) + xsc_stall_cq_poll_min = atoi(env_value); + + env_value = getenv("XSC_STALL_CQ_POLL_MAX"); + if (env_value) + xsc_stall_cq_poll_max = atoi(env_value); + + env_value = getenv("XSC_STALL_CQ_INC_STEP"); + if (env_value) + xsc_stall_cq_inc_step = atoi(env_value); + + env_value = getenv("XSC_STALL_CQ_DEC_STEP"); + if (env_value) + xsc_stall_cq_dec_step = atoi(env_value); + + ctx->stall_adaptive_enable = 0; + ctx->stall_cycles = 0; + + if (xsc_stall_num_loop < 0) { + ctx->stall_adaptive_enable = 1; + ctx->stall_cycles = xsc_stall_cq_poll_min; + } + +} + +static void open_debug_file(struct xsc_context *ctx) +{ + char *env; + + env = getenv("XSC_DEBUG_FILE"); + if (!env) { + ctx->dbg_fp = stderr; + return; + } + + ctx->dbg_fp = fopen(env, "aw+"); + if (!ctx->dbg_fp) { + fprintf(stderr, "Failed opening debug file %s, using stderr\n", env); + ctx->dbg_fp = stderr; + return; + } +} + +static void close_debug_file(struct xsc_context *ctx) +{ + if (ctx->dbg_fp && ctx->dbg_fp != stderr) + fclose(ctx->dbg_fp); +} + +static void set_debug_mask(void) +{ + char *env; + + env = getenv("XSC_DEBUG_MASK"); + if (env) + xsc_debug_mask = strtol(env, NULL, 0); +} + +static void set_freeze_on_error(void) +{ + char *env; + + env = getenv("XSC_FREEZE_ON_ERROR_CQE"); + if (env) + xsc_freeze_on_error_cqe = strtol(env, NULL, 0); +} + +static int single_threaded_app(void) +{ + + char *env; + + env = getenv("XSC_SINGLE_THREADED"); + if (env) + return strcmp(env, "1") ? 0 : 1; + + return 0; +} + +static int xsc_cmd_get_context(struct xsc_context *context, + struct xsc_alloc_ucontext *req, + size_t req_len, + struct xsc_alloc_ucontext_resp *resp, + size_t resp_len) +{ + struct verbs_context *verbs_ctx = &context->ibv_ctx; + + return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, + req_len, &resp->ibv_resp, resp_len); +} + +int xscdv_query_device(struct ibv_context *ctx_in, + struct xscdv_context *attrs_out) +{ + struct xsc_context *xctx = to_xctx(ctx_in); + uint64_t comp_mask_out = 0; + + attrs_out->version = 0; + attrs_out->flags = 0; + + if (xctx->cqe_version == XSC_CQE_VERSION_V1) + attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_V1; + + if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED) + attrs_out->flags |= XSCDV_CONTEXT_FLAGS_MPW_ALLOWED; + + if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP) + attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_COMP; + + if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD) + attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_PAD; + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CQE_COMPRESION) { + attrs_out->cqe_comp_caps = xctx->cqe_comp_caps; + comp_mask_out |= XSCDV_CONTEXT_MASK_CQE_COMPRESION; + } + + if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW) + attrs_out->flags |= XSCDV_CONTEXT_FLAGS_ENHANCED_MPW; + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_SWP) { + attrs_out->sw_parsing_caps = xctx->sw_parsing_caps; + comp_mask_out |= XSCDV_CONTEXT_MASK_SWP; + } + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_STRIDING_RQ) { + attrs_out->striding_rq_caps = xctx->striding_rq_caps; + comp_mask_out |= XSCDV_CONTEXT_MASK_STRIDING_RQ; + } + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS) { + attrs_out->tunnel_offloads_caps = xctx->tunnel_offloads_caps; + comp_mask_out |= XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS; + } + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE) { + if (xctx->clock_info_page) { + attrs_out->max_clock_info_update_nsec = + xctx->clock_info_page->overflow_period; + comp_mask_out |= XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE; + } + } + + if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS) { + attrs_out->flow_action_flags = xctx->flow_action_flags; + comp_mask_out |= XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS; + } + + attrs_out->comp_mask = comp_mask_out; + + return 0; +} + +static int xscdv_get_qp(struct ibv_qp *qp_in, + struct xscdv_qp *qp_out) +{ + struct xsc_qp *xqp = to_xqp(qp_in); + uint64_t mask_out = 0; + + qp_out->dbrec = xqp->db; + qp_out->sq.db = xqp->sq.db; + qp_out->rq.db = xqp->rq.db; + + if (xqp->sq_buf_size) + /* IBV_QPT_RAW_PACKET */ + qp_out->sq.buf = (void *)((uintptr_t)xqp->sq_buf.buf); + else + qp_out->sq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->sq.offset); + qp_out->sq.wqe_cnt = xqp->sq.wqe_cnt; + qp_out->sq.stride = 1 << xqp->sq.wqe_shift; + + qp_out->rq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->rq.offset); + qp_out->rq.wqe_cnt = xqp->rq.wqe_cnt; + qp_out->rq.stride = 1 << xqp->rq.wqe_shift; + + if (qp_out->comp_mask & XSCDV_QP_MASK_RAW_QP_HANDLES) { + qp_out->tirn = xqp->tirn; + qp_out->tisn = xqp->tisn; + qp_out->rqn = xqp->rqn; + qp_out->sqn = xqp->sqn; + mask_out |= XSCDV_QP_MASK_RAW_QP_HANDLES; + } + + qp_out->comp_mask = mask_out; + + return 0; +} + +static int xscdv_get_cq(struct ibv_cq *cq_in, + struct xscdv_cq *cq_out) +{ + struct xsc_cq *xcq = to_xcq(cq_in); + + cq_out->comp_mask = 0; + cq_out->cqn = xcq->cqn; + cq_out->cqe_cnt = xcq->verbs_cq.cq_ex.cqe; + cq_out->cqe_size = xcq->cqe_sz; + cq_out->buf = xcq->active_buf->buf; + cq_out->dbrec = xcq->dbrec; + cq_out->db = xcq->db; + xcq->flags |= XSC_CQ_FLAGS_DV_OWNED; + + return 0; +} + +static int xscdv_get_rwq(struct ibv_wq *wq_in, + struct xscdv_rwq *rwq_out) +{ + struct xsc_rwq *xrwq = to_xrwq(wq_in); + + rwq_out->comp_mask = 0; + rwq_out->buf = xrwq->pbuff; + rwq_out->dbrec = xrwq->recv_db; + rwq_out->wqe_cnt = xrwq->rq.wqe_cnt; + rwq_out->stride = 1 << xrwq->rq.wqe_shift; + rwq_out->db = xrwq->rq.db; + + return 0; +} + +static int xscdv_get_dm(struct ibv_dm *dm_in, + struct xscdv_dm *dm_out) +{ + struct xsc_dm *xdm = to_xdm(dm_in); + + dm_out->comp_mask = 0; + dm_out->buf = xdm->start_va; + dm_out->length = xdm->length; + + return 0; +} + +static int xscdv_get_av(struct ibv_ah *ah_in, + struct xscdv_ah *ah_out) +{ + struct xsc_ah *xah = to_xah(ah_in); + + ah_out->comp_mask = 0; + ah_out->av = &xah->av; + + return 0; +} + +static int xscdv_get_pd(struct ibv_pd *pd_in, + struct xscdv_pd *pd_out) +{ + struct xsc_pd *xpd = to_xpd(pd_in); + + pd_out->comp_mask = 0; + pd_out->pdn = xpd->pdn; + + return 0; +} + + int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type) +{ + int ret = 0; + + if (obj_type & XSCDV_OBJ_QP) + ret = xscdv_get_qp(obj->qp.in, obj->qp.out); + if (!ret && (obj_type & XSCDV_OBJ_CQ)) + ret = xscdv_get_cq(obj->cq.in, obj->cq.out); + if (!ret && (obj_type & XSCDV_OBJ_RWQ)) + ret = xscdv_get_rwq(obj->rwq.in, obj->rwq.out); + if (!ret && (obj_type & XSCDV_OBJ_DM)) + ret = xscdv_get_dm(obj->dm.in, obj->dm.out); + if (!ret && (obj_type & XSCDV_OBJ_AH)) + ret = xscdv_get_av(obj->ah.in, obj->ah.out); + if (!ret && (obj_type & XSCDV_OBJ_PD)) + ret = xscdv_get_pd(obj->pd.in, obj->pd.out); + + return ret; +} + +int xscdv_set_context_attr(struct ibv_context *ibv_ctx, + enum xscdv_set_ctx_attr_type type, void *attr) +{ + struct xsc_context *ctx = to_xctx(ibv_ctx); + + switch (type) { + case XSCDV_CTX_ATTR_BUF_ALLOCATORS: + ctx->extern_alloc = *((struct xscdv_ctx_allocators *)attr); + break; + default: + return ENOTSUP; + } + + return 0; +} + +int xscdv_get_clock_info(struct ibv_context *ctx_in, + struct xscdv_clock_info *clock_info) +{ + struct xsc_context *ctx = to_xctx(ctx_in); + const struct xsc_ib_clock_info *ci = ctx->clock_info_page; + uint32_t retry, tmp_sig; + atomic_uint32_t *sig; + + if (!ci) + return EINVAL; + + sig = (atomic_uint32_t *)&ci->sign; + + do { + retry = 10; +repeat: + tmp_sig = atomic_load(sig); + if (unlikely(tmp_sig & + XSC_IB_CLOCK_INFO_KERNEL_UPDATING)) { + if (--retry) + goto repeat; + return EBUSY; + } + clock_info->nsec = ci->nsec; + clock_info->last_cycles = ci->cycles; + clock_info->frac = ci->frac; + clock_info->mult = ci->mult; + clock_info->shift = ci->shift; + clock_info->mask = ci->mask; + } while (unlikely(tmp_sig != atomic_load(sig))); + + return 0; +} + +struct ibv_context * +xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr) +{ + return verbs_open_device(device, attr); +} + +static int xsc_mmap(struct xsc_device *xdev, struct xsc_context *context, + int cmd_fd, int size) +{ + uint64_t page_mask; + + page_mask = (~(xdev->page_size - 1)); + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "page size:%d\n", size); + context->sqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, context->qpm_tx_db & page_mask); + if (context->sqm_reg_va == MAP_FAILED) { + return -1; + } + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->sqm_reg_va); + + context->rqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, context->qpm_rx_db & page_mask); + if (context->rqm_reg_va == MAP_FAILED) { + goto free_sqm; + } + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->rqm_reg_va); + + context->cqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, context->cqm_next_cid_reg & page_mask); + if (context->cqm_reg_va == MAP_FAILED) { + goto free_rqm; + } + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm ci va:%p\n", context->cqm_reg_va); + context->db_mmap_size = size; + + context->cqm_armdb_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + cmd_fd, context->cqm_armdb & page_mask); + if (context->cqm_armdb_va == MAP_FAILED) { + goto free_cqm; + } + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm armdb va:%p\n", context->cqm_armdb_va); + + return 0; + +free_cqm: + munmap(context->cqm_reg_va, size); +free_rqm: + munmap(context->rqm_reg_va, size); +free_sqm: + munmap(context->sqm_reg_va, size); + + return -1; + +} +static void xsc_munmap(struct xsc_context *context) +{ + if (context->sqm_reg_va) + munmap(context->sqm_reg_va, context->db_mmap_size); + + if (context->rqm_reg_va) + munmap(context->rqm_reg_va, context->db_mmap_size); + + if (context->cqm_reg_va) + munmap(context->cqm_reg_va, context->db_mmap_size); + + if (context->cqm_armdb_va) + munmap(context->cqm_armdb_va, context->db_mmap_size); + +} +static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +{ + struct xsc_context *context; + struct xsc_alloc_ucontext req; + struct xsc_alloc_ucontext_resp resp; + int i; + int page_size; + int j; + struct xsc_device *xdev = to_xdev(ibdev); + struct verbs_context *v_ctx; + struct ibv_port_attr port_attr; + struct ibv_device_attr_ex device_attr; + struct xscdv_context_attr *ctx_attr = private_data; + + if (ctx_attr && ctx_attr->comp_mask) { + errno = EINVAL; + return NULL; + } + + context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, + RDMA_DRIVER_XSC); + if (!context) + return NULL; + + v_ctx = &context->ibv_ctx; + page_size = xdev->page_size; + xsc_single_threaded = single_threaded_app(); + + open_debug_file(context); + set_debug_mask(); + set_freeze_on_error(); + if (gethostname(context->hostname, sizeof(context->hostname))) + strcpy(context->hostname, "host_unknown"); + + memset(&req, 0, sizeof(req)); + memset(&resp, 0, sizeof(resp)); + + if (xsc_cmd_get_context(context, &req, sizeof(req), &resp, + sizeof(resp))) + goto err_free; + + context->max_num_qps = resp.qp_tab_size; + context->cache_line_size = resp.cache_line_size; + context->max_sq_desc_sz = resp.max_sq_desc_sz; + context->max_rq_desc_sz = resp.max_rq_desc_sz; + context->max_send_wqebb = resp.max_send_wqebb; + context->num_ports = resp.num_ports; + context->max_recv_wr = resp.max_recv_wr; + context->qpm_tx_db = resp.qpm_tx_db; + context->qpm_rx_db = resp.qpm_rx_db; + context->cqm_next_cid_reg = resp.cqm_next_cid_reg; + context->cqm_armdb = resp.cqm_armdb; + context->send_ds_num = resp.send_ds_num; + context->send_ds_shift = xsc_ilog2(resp.send_ds_num); + context->recv_ds_num = resp.recv_ds_num; + context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num); + + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, + "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \ + "max_send_wqebb:%u, num_ports:%u, max_recv_wr:%u\n", + context->max_num_qps, context->max_sq_desc_sz, + context->max_rq_desc_sz, context->max_send_wqebb, + context->num_ports, context->max_recv_wr); + + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "send_ds_num:%u shift:%u recv_ds_num:%u shift:%u\n", + context->send_ds_num, context->send_ds_shift, + context->recv_ds_num, context->recv_ds_shift); + context->dump_fill_mkey = XSC_INVALID_LKEY; + context->dump_fill_mkey_be = htobe32(XSC_INVALID_LKEY); + context->eth_min_inline_size = XSC_ETH_L2_INLINE_HEADER_SIZE; + context->cmds_supp_uhw = resp.cmds_supp_uhw; + + pthread_mutex_init(&context->qp_table_mutex, NULL); + pthread_mutex_init(&context->uidx_table_mutex, NULL); + for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + + for (i = 0; i < XSC_QP_TABLE_SIZE; ++i) + context->uidx_table[i].refcnt = 0; + + context->db_list = NULL; + context->page_size = page_size; + if (xsc_mmap(xdev, context, cmd_fd, page_size)) + goto err_free; + + pthread_mutex_init(&context->db_list_mutex, NULL); + + context->hca_core_clock = NULL; + context->clock_info_page = NULL; + + xsc_read_env(ibdev, context); + + xsc_spinlock_init(&context->hugetlb_lock, !xsc_single_threaded); + list_head_init(&context->hugetlb_list); + + verbs_set_ops(v_ctx, &xsc_ctx_common_ops); + + memset(&device_attr, 0, sizeof(device_attr)); + if (!xsc_query_device_ex(&v_ctx->context, NULL, &device_attr, + sizeof(struct ibv_device_attr_ex))) { + context->cached_device_cap_flags = + device_attr.orig_attr.device_cap_flags; + context->atomic_cap = device_attr.orig_attr.atomic_cap; + context->cached_tso_caps = device_attr.tso_caps; + context->max_dm_size = device_attr.max_dm_size; + } + + for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) { + memset(&port_attr, 0, sizeof(port_attr)); + if (!xsc_query_port(&v_ctx->context, j + 1, &port_attr)) { + context->cached_link_layer[j] = port_attr.link_layer; + context->cached_port_flags[j] = port_attr.flags; + } + } + + return v_ctx; + +err_free: + verbs_uninit_context(&context->ibv_ctx); + close_debug_file(context); + free(context); + return NULL; +} + +static void xsc_free_context(struct ibv_context *ibctx) +{ + struct xsc_context *context = to_xctx(ibctx); + + xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "\n"); + xsc_munmap(context); + + verbs_uninit_context(&context->ibv_ctx); + close_debug_file(context); + free(context); +} + +static void xsc_uninit_device(struct verbs_device *verbs_device) +{ + struct xsc_device *xdev = to_xdev(&verbs_device->device); + + free(xdev); +} + +static struct verbs_device *xsc_device_alloc(struct verbs_sysfs_dev *sysfs_dev) +{ + struct xsc_device *xdev; + + xdev = calloc(1, sizeof *xdev); + if (!xdev) + return NULL; + + xdev->page_size = sysconf(_SC_PAGESIZE); + xdev->driver_abi_ver = sysfs_dev->abi_ver; + + return &xdev->verbs_dev; +} + +static const struct verbs_device_ops xsc_dev_ops = { + .name = "xscale", + .match_min_abi_version = XSC_UVERBS_MIN_ABI_VERSION, + .match_max_abi_version = XSC_UVERBS_MAX_ABI_VERSION, + .match_table = hca_table, + .alloc_device = xsc_device_alloc, + .uninit_device = xsc_uninit_device, + .alloc_context = xsc_alloc_context, +}; +PROVIDER_DRIVER(xscale, xsc_dev_ops); diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h new file mode 100644 index 0000000..0aee472 --- /dev/null +++ b/providers/xscale/xscale.h @@ -0,0 +1,834 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef XSCALE_H +#define XSCALE_H + +#include +#include +#include +#include +#include + +#include +#include +#include "xsc-abi.h" +#include +#include "bitmap.h" +#include +#include "xscdv.h" + +#include + +#define PFX "xsc: " +#define offsetofend(_type, _member) \ + (offsetof(_type, _member) + sizeof(((_type *)0)->_member)) + +typedef _Atomic(uint32_t) atomic_uint32_t; + +enum { + XSC_IB_MMAP_CMD_SHIFT = 8, + XSC_IB_MMAP_CMD_MASK = 0xff, +}; + +enum { + XSC_CQE_VERSION_V0 = 0, + XSC_CQE_VERSION_V1 = 1, +}; + +enum { + XSC_ADAPTER_PAGE_SIZE = 4096, +}; + +enum { + XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9, + XSC_QP_FLAG_RAWPACKET_TX = 1 << 10, +}; + + +#define XSC_CQ_PREFIX "XSC_CQ" +#define XSC_QP_PREFIX "XSC_QP" +#define XSC_MR_PREFIX "XSC_MR" +#define XSC_RWQ_PREFIX "XSC_RWQ" +#define XSC_MAX_LOG2_CONTIG_BLOCK_SIZE 23 +#define XSC_MIN_LOG2_CONTIG_BLOCK_SIZE 12 + +enum { + XSC_DBG_QP = 1 << 0, + XSC_DBG_CQ = 1 << 1, + XSC_DBG_QP_SEND = 1 << 2, + XSC_DBG_QP_SEND_ERR = 1 << 3, + XSC_DBG_CQ_CQE = 1 << 4, + XSC_DBG_CONTIG = 1 << 5, + XSC_DBG_DR = 1 << 6, + XSC_DBG_CTX = 1 << 7, + XSC_DBG_PD = 1 << 8, + XSC_DBG_MR = 1 << 9, +}; + +extern uint32_t xsc_debug_mask; +extern int xsc_freeze_on_error_cqe; + +#define XSC_DEBUG +#ifdef XSC_DEBUG +#define xsc_dbg(fp, mask, fmt, args...) \ +do { \ + if (xsc_debug_mask & mask) { \ + char host[256]; \ + char timestr[32]; \ + struct tm now_tm; \ + time_t now_time; \ + time(&now_time); \ + localtime_r(&now_time, &now_tm); \ + strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ + gethostname(host, 256); \ + fprintf(fp, "[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ + } \ +} while (0) +#else +static inline void xsc_dbg(FILE *fp, uint32_t mask, const char *fmt, ...) +{ +} +#endif + +#define xsc_err(fmt, args...) \ +do { \ + char host[256]; \ + char timestr[32]; \ + struct tm now_tm; \ + time_t now_time; \ + time(&now_time); \ + localtime_r(&now_time, &now_tm); \ + strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \ + gethostname(host, 256); \ + printf("[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \ +} while (0) + +enum { + XSC_QP_TABLE_SHIFT = 12, + XSC_QP_TABLE_MASK = (1 << XSC_QP_TABLE_SHIFT) - 1, + XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT), +}; + +enum { + XSC_UIDX_TABLE_SHIFT = 12, + XSC_UIDX_TABLE_MASK = (1 << XSC_UIDX_TABLE_SHIFT) - 1, + XSC_UIDX_TABLE_SIZE = 1 << (24 - XSC_UIDX_TABLE_SHIFT), +}; + +enum { + XSC_MAX_PORTS_NUM = 2, +}; + +enum xsc_alloc_type { + XSC_ALLOC_TYPE_ANON, + XSC_ALLOC_TYPE_HUGE, + XSC_ALLOC_TYPE_CONTIG, + XSC_ALLOC_TYPE_PREFER_HUGE, + XSC_ALLOC_TYPE_PREFER_CONTIG, + XSC_ALLOC_TYPE_EXTERNAL, + XSC_ALLOC_TYPE_ALL +}; + +enum xsc_rsc_type { + XSC_RSC_TYPE_QP, + XSC_RSC_TYPE_XSRQ, + XSC_RSC_TYPE_SRQ, + XSC_RSC_TYPE_RWQ, + XSC_RSC_TYPE_INVAL, +}; + +enum xsc_vendor_cap_flags { + XSC_VENDOR_CAP_FLAGS_MPW = 1 << 0, /* Obsoleted */ + XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED = 1 << 1, + XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW = 1 << 2, + XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP = 1 << 3, + XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD = 1 << 4, +}; + +enum { + XSC_FLOW_TAG_MASK = 0x00ffffff, +}; + +struct xsc_resource { + enum xsc_rsc_type type; + uint32_t rsn; +}; + +struct xsc_device { + struct verbs_device verbs_dev; + int page_size; + int driver_abi_ver; +}; + +struct xsc_db_page; + +struct xsc_spinlock { + pthread_spinlock_t lock; + int in_use; + int need_lock; +}; + +/* PAGE_SHIFT determines the page size */ + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +struct xsc_context { + struct verbs_context ibv_ctx; + int max_num_qps; + struct { + struct xsc_qp **table; + int refcnt; + } qp_table[XSC_QP_TABLE_SIZE]; + pthread_mutex_t qp_table_mutex; + + struct { + struct xsc_resource **table; + int refcnt; + } uidx_table[XSC_UIDX_TABLE_SIZE]; + pthread_mutex_t uidx_table_mutex; + + struct xsc_db_page *db_list; + pthread_mutex_t db_list_mutex; + int cache_line_size; + int max_sq_desc_sz; + int max_rq_desc_sz; + int max_send_wqebb; + int max_recv_wr; + int num_ports; + int stall_enable; + int stall_adaptive_enable; + int stall_cycles; + char hostname[40]; + struct xsc_spinlock hugetlb_lock; + struct list_head hugetlb_list; + int cqe_version; + uint8_t cached_link_layer[XSC_MAX_PORTS_NUM]; + uint8_t cached_port_flags[XSC_MAX_PORTS_NUM]; + unsigned int cached_device_cap_flags; + enum ibv_atomic_cap atomic_cap; + struct { + uint64_t offset; + uint64_t mask; + } core_clock; + void *hca_core_clock; + const struct xsc_ib_clock_info *clock_info_page; + struct ibv_tso_caps cached_tso_caps; + int cmds_supp_uhw; + uint64_t vendor_cap_flags; /* Use enum xsc_vendor_cap_flags */ + struct xscdv_cqe_comp_caps cqe_comp_caps; + struct xscdv_ctx_allocators extern_alloc; + struct xscdv_sw_parsing_caps sw_parsing_caps; + struct xscdv_striding_rq_caps striding_rq_caps; + uint32_t tunnel_offloads_caps; + struct xsc_packet_pacing_caps packet_pacing_caps; + uint16_t flow_action_flags; + uint64_t max_dm_size; + uint32_t eth_min_inline_size; + uint32_t dump_fill_mkey; + __be32 dump_fill_mkey_be; + void *sqm_reg_va; + void *rqm_reg_va; + void *cqm_reg_va; + void *cqm_armdb_va; + int db_mmap_size; + uint32_t page_size; + uint64_t qpm_tx_db; + uint64_t qpm_rx_db; + uint64_t cqm_next_cid_reg; + uint64_t cqm_armdb; + uint32_t send_ds_num; + uint32_t recv_ds_num; + uint32_t send_ds_shift; + uint32_t recv_ds_shift; + FILE *dbg_fp; +}; + +struct xsc_bitmap { + uint32_t last; + uint32_t top; + uint32_t max; + uint32_t avail; + uint32_t mask; + unsigned long *table; +}; + +struct xsc_hugetlb_mem { + int shmid; + void *shmaddr; + struct xsc_bitmap bitmap; + struct list_node entry; +}; + +struct xsc_buf { + void *buf; + size_t length; + int base; + struct xsc_hugetlb_mem *hmem; + enum xsc_alloc_type type; +}; + +struct xsc_pd { + struct ibv_pd ibv_pd; + uint32_t pdn; + atomic_int refcount; + struct xsc_pd *xprotection_domain; +}; + +struct xsc_parent_domain { + struct xsc_pd xpd; +}; + +enum { + XSC_CQ_FLAGS_RX_CSUM_VALID = 1 << 0, + XSC_CQ_FLAGS_EMPTY_DURING_POLL = 1 << 1, + XSC_CQ_FLAGS_FOUND_CQES = 1 << 2, + XSC_CQ_FLAGS_EXTENDED = 1 << 3, + XSC_CQ_FLAGS_SINGLE_THREADED = 1 << 4, + XSC_CQ_FLAGS_DV_OWNED = 1 << 5, + XSC_CQ_FLAGS_TM_SYNC_REQ = 1 << 6, +}; + +struct xsc_err_state_qp_node { + struct list_node entry; + uint32_t qp_id; + bool is_sq; +}; + +struct xsc_cq { + /* ibv_cq should always be subset of ibv_cq_ex */ + struct verbs_cq verbs_cq; + struct xsc_buf buf_a; + struct xsc_buf buf_b; + struct xsc_buf *active_buf; + struct xsc_buf *resize_buf; + int resize_cqes; + int active_cqes; + struct xsc_spinlock lock; + uint32_t cqn; + uint32_t cons_index; + __le32 *dbrec; + __le32 *db; + __le32 *armdb; + uint32_t cqe_cnt; + int log2_cq_ring_sz; + int arm_sn; + int cqe_sz; + int resize_cqe_sz; + int stall_next_poll; + int stall_enable; + uint64_t stall_last_count; + int stall_adaptive_enable; + int stall_cycles; + struct xsc_resource *cur_rsc; + struct xsc_cqe64 *cqe64; + uint32_t flags; + int umr_opcode; + struct xscdv_clock_info last_clock_info; + bool disable_flush_error_cqe; + struct list_head err_state_qp_list; +}; + +struct wr_list { + uint16_t opcode; + uint16_t next; +}; + +struct xsc_wq { + uint64_t *wrid; + unsigned *wqe_head; + struct xsc_spinlock lock; + unsigned wqe_cnt; + unsigned max_post; + unsigned head; + unsigned tail; + unsigned cur_post; + int max_gs; + int wqe_shift; + int offset; + void *qend; + uint32_t *wr_data; + __le32 *db; + unsigned ds_cnt; + unsigned seg_cnt; + unsigned *wr_opcode; + unsigned *need_flush; + unsigned flush_wqe_cnt; +}; + +struct xsc_dm { + struct verbs_dm verbs_dm; + size_t length; + void *mmap_va; + void *start_va; +}; + +struct xsc_mr { + struct verbs_mr vmr; + struct xsc_buf buf; + uint32_t alloc_flags; +}; + +enum xsc_qp_flags { + XSC_QP_FLAGS_USE_UNDERLAY = 0x01, +}; + +struct xsc_qp { + struct xsc_resource rsc; /* This struct must be first */ + struct verbs_qp verbs_qp; + struct ibv_qp *ibv_qp; + struct xsc_buf buf; + void *sq_start; + void *rq_start; + int max_inline_data; + int buf_size; + /* For Raw Packet QP, use different buffers for the SQ and RQ */ + struct xsc_buf sq_buf; + int sq_buf_size; + + uint8_t fm_cache; + uint8_t sq_signal_bits; + struct xsc_wq sq; + + __le32 *db; + struct xsc_wq rq; + int wq_sig; + uint32_t qp_cap_cache; + int atomics_enabled; + uint32_t max_tso; + uint16_t max_tso_header; + int rss_qp; + uint32_t flags; /* Use enum xsc_qp_flags */ + enum xscdv_dc_type dc_type; + uint32_t tirn; + uint32_t tisn; + uint32_t rqn; + uint32_t sqn; +}; + +struct xsc_ah { + struct ibv_ah ibv_ah; + struct xsc_wqe_av av; + bool kern_ah; +}; + +struct xsc_rwq { + struct xsc_resource rsc; + struct ibv_wq wq; + struct xsc_buf buf; + int buf_size; + struct xsc_wq rq; + __le32 *db; + void *pbuff; + __le32 *recv_db; + int wq_sig; +}; + +struct xsc_counter_node { + uint32_t index; + struct list_node entry; + enum ibv_counter_description desc; +}; + +struct xsc_counters { + struct verbs_counters vcounters; + struct list_head counters_list; + pthread_mutex_t lock; + uint32_t ncounters; + /* number of bounded objects */ + int refcount; +}; + +struct xsc_flow { + struct ibv_flow flow_id; + struct xsc_counters *mcounters; +}; + +struct xscdv_flow_matcher { + struct ibv_context *context; + uint32_t handle; +}; + +struct xscdv_devx_obj { + struct ibv_context *context; + uint32_t handle; +}; + +struct xsc_devx_umem { + struct xscdv_devx_umem dv_devx_umem; + struct ibv_context *context; + uint32_t handle; + void *addr; + size_t size; +}; + +union xsc_ib_fw_ver { + uint64_t data; + struct { + uint8_t ver_major; + uint8_t ver_minor; + uint16_t ver_patch; + uint32_t ver_tweak; + } s; +}; + +static inline int xsc_ilog2(int n) +{ + int t; + + if (n <= 0) + return -1; + + t = 0; + while ((1 << t) < n) + ++t; + + return t; +} + +extern int xsc_stall_num_loop; +extern int xsc_stall_cq_poll_min; +extern int xsc_stall_cq_poll_max; +extern int xsc_stall_cq_inc_step; +extern int xsc_stall_cq_dec_step; +extern int xsc_single_threaded; + +static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d) +{ + return (n + d - 1u) / d; +} + +static inline unsigned long align(unsigned long val, unsigned long align) +{ + return (val + align - 1) & ~(align - 1); +} + +static inline struct xsc_device *to_xdev(struct ibv_device *ibdev) +{ + return container_of(ibdev, struct xsc_device, verbs_dev.device); +} + +static inline struct xsc_context *to_xctx(struct ibv_context *ibctx) +{ + return container_of(ibctx, struct xsc_context, ibv_ctx.context); +} + +/* to_xpd always returns the real xsc_pd object ie the protection domain. */ +static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd) +{ + struct xsc_pd *xpd = container_of(ibpd, struct xsc_pd, ibv_pd); + + if (xpd->xprotection_domain) + return xpd->xprotection_domain; + + return xpd; +} + +static inline struct xsc_parent_domain *to_xparent_domain(struct ibv_pd *ibpd) +{ + struct xsc_parent_domain *xparent_domain = + ibpd ? container_of(ibpd, struct xsc_parent_domain, xpd.ibv_pd) : NULL; + + if (xparent_domain && xparent_domain->xpd.xprotection_domain) + return xparent_domain; + + /* Otherwise ibpd isn't a parent_domain */ + return NULL; +} + +static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq) +{ + return container_of((struct ibv_cq_ex *)ibcq, struct xsc_cq, verbs_cq.cq_ex); +} + +static inline struct xsc_qp *to_xqp(struct ibv_qp *ibqp) +{ + struct verbs_qp *vqp = (struct verbs_qp *)ibqp; + + return container_of(vqp, struct xsc_qp, verbs_qp); +} + +static inline struct xsc_rwq *to_xrwq(struct ibv_wq *ibwq) +{ + return container_of(ibwq, struct xsc_rwq, wq); +} + +static inline struct xsc_dm *to_xdm(struct ibv_dm *ibdm) +{ + return container_of(ibdm, struct xsc_dm, verbs_dm.dm); +} + +static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr) +{ + return container_of(ibmr, struct xsc_mr, vmr.ibv_mr); +} + +static inline struct xsc_ah *to_xah(struct ibv_ah *ibah) +{ + return container_of(ibah, struct xsc_ah, ibv_ah); +} + +static inline int max_int(int a, int b) +{ + return a > b ? a : b; +} + +static inline struct xsc_qp *rsc_to_xqp(struct xsc_resource *rsc) +{ + return (struct xsc_qp *)rsc; +} + +static inline struct xsc_rwq *rsc_to_mrwq(struct xsc_resource *rsc) +{ + return (struct xsc_rwq *)rsc; +} + +static inline struct xsc_counters *to_mcounters(struct ibv_counters *ibcounters) +{ + return container_of(ibcounters, struct xsc_counters, vcounters.counters); +} + +static inline struct xsc_flow *to_mflow(struct ibv_flow *flow_id) +{ + return container_of(flow_id, struct xsc_flow, flow_id); +} + +int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size); +void xsc_free_buf(struct xsc_buf *buf); +int xsc_alloc_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf, + size_t size, int page_size, const char *component); +void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf); +int xsc_alloc_prefered_buf(struct xsc_context *xctx, + struct xsc_buf *buf, + size_t size, int page_size, + enum xsc_alloc_type alloc_type, + const char *component); +int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf); +void xsc_get_alloc_type(struct xsc_context *context, + const char *component, + enum xsc_alloc_type *alloc_type, + enum xsc_alloc_type default_alloc_type); +int xsc_use_huge(const char *key); +bool xsc_is_extern_alloc(struct xsc_context *context); +int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf, + size_t size); +void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf); + +__le32 *xsc_alloc_dbrec(struct xsc_context *context); +void xsc_free_db(struct xsc_context *context, __le32 *db); + +int xsc_query_device(struct ibv_context *context, + struct ibv_device_attr *attr); +int xsc_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, + size_t attr_size); +int xsc_query_rt_values(struct ibv_context *context, + struct ibv_values_ex *values); +struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *attr); +int xsc_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + +struct ibv_pd *xsc_alloc_pd(struct ibv_context *context); +int xsc_free_pd(struct ibv_pd *pd); + +struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd); +struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, uint64_t hca_va, int access); +int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, + size_t length, int access); +int xsc_dereg_mr(struct verbs_mr *mr); +struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector); +struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr); +int xsc_cq_fill_pfns(struct xsc_cq *cq, + const struct ibv_cq_init_attr_ex *cq_attr, + struct xsc_context *xctx); +int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq, + struct xsc_buf *buf, int nent, int cqe_sz); +int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf); +int xsc_resize_cq(struct ibv_cq *cq, int cqe); +int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); +int xsc_destroy_cq(struct ibv_cq *cq); +int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); +int xsc_arm_cq(struct ibv_cq *cq, int solicited); +void xsc_cq_event(struct ibv_cq *cq); +void __xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); +void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn); + +struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); +int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask, + struct ibv_qp_init_attr *init_attr); +int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); +int xsc_modify_qp_rate_limit(struct ibv_qp *qp, + struct ibv_qp_rate_limit_attr *attr); +int xsc_destroy_qp(struct ibv_qp *qp); +void xsc_init_qp_indices(struct xsc_qp *qp); +void xsc_init_rwq_indices(struct xsc_rwq *rwq); +int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); +int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn); +int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp); +void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn); +int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state, + enum ibv_qp_state state); +int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc); +void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx); +struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); +int xsc_destroy_ah(struct ibv_ah *ah); +int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); +int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); +int xsc_round_up_power_of_two(long long sz); +void *xsc_get_send_wqe(struct xsc_qp *qp, int n); +struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context, + struct ibv_xrcd_init_attr *xrcd_init_attr); +int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd); +struct ibv_wq *xsc_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *attr); +int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr); +int xsc_destroy_wq(struct ibv_wq *wq); +struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr); +int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table); +struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr); +int xsc_destroy_flow(struct ibv_flow *flow_id); +struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *attr); +int xsc_destroy_flow_action(struct ibv_flow_action *action); +int xsc_modify_flow_action_esp(struct ibv_flow_action *action, + struct ibv_flow_action_esp_attr *attr); + +struct ibv_dm *xsc_alloc_dm(struct ibv_context *context, + struct ibv_alloc_dm_attr *dm_attr); +int xsc_free_dm(struct ibv_dm *ibdm); +struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm, + uint64_t dm_offset, size_t length, + unsigned int acc); + +struct ibv_pd *xsc_alloc_parent_domain(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr); + + +struct ibv_counters *xsc_create_counters(struct ibv_context *context, + struct ibv_counters_init_attr *init_attr); +int xsc_destroy_counters(struct ibv_counters *counters); +int xsc_attach_counters_point_flow(struct ibv_counters *counters, + struct ibv_counter_attach_attr *attr, + struct ibv_flow *flow); +int xsc_read_counters(struct ibv_counters *counters, + uint64_t *counters_value, + uint32_t ncounters, + uint32_t flags); + +static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx) +{ + int tind = uidx >> XSC_UIDX_TABLE_SHIFT; + + if (likely(ctx->uidx_table[tind].refcnt)) + return ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK]; + + return NULL; +} + +static inline int xsc_spin_lock(struct xsc_spinlock *lock) +{ + if (lock->need_lock) + return pthread_spin_lock(&lock->lock); + + if (unlikely(lock->in_use)) { + fprintf(stderr, "*** ERROR: multithreading vilation ***\n" + "You are running a multithreaded application but\n" + "you set XSC_SINGLE_THREADED=1. Please unset it.\n"); + abort(); + } else { + lock->in_use = 1; + /* + * This fence is not at all correct, but it increases the + * chance that in_use is detected by another thread without + * much runtime cost. */ + atomic_thread_fence(memory_order_acq_rel); + } + + return 0; +} + +static inline int xsc_spin_unlock(struct xsc_spinlock *lock) +{ + if (lock->need_lock) + return pthread_spin_unlock(&lock->lock); + + lock->in_use = 0; + + return 0; +} + +static inline int xsc_spinlock_init(struct xsc_spinlock *lock, int need_lock) +{ + lock->in_use = 0; + lock->need_lock = need_lock; + return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE); +} + +static inline int xsc_spinlock_init_pd(struct xsc_spinlock *lock, struct ibv_pd *pd) +{ + int thread_safe = xsc_single_threaded; + + return xsc_spinlock_init(lock, !thread_safe); +} + +static inline int xsc_spinlock_destroy(struct xsc_spinlock *lock) +{ + return pthread_spin_destroy(&lock->lock); +} + +static inline void set_command(int command, off_t *offset) +{ + *offset |= (command << XSC_IB_MMAP_CMD_SHIFT); +} + +static inline void set_arg(int arg, off_t *offset) +{ + *offset |= arg; +} + +static inline void set_order(int order, off_t *offset) +{ + set_arg(order, offset); +} + +static inline void set_index(int index, off_t *offset) +{ + set_arg(index, offset); +} + +static inline void set_extended_index(int index, off_t *offset) +{ + *offset |= (index & 0xff) | ((index >> 8) << 16); +} + +static inline uint8_t calc_sig(void *wqe, int size) +{ + int i; + uint8_t *p = wqe; + uint8_t res = 0; + + for (i = 0; i < size; ++i) + res ^= p[i]; + + return ~res; +} + +#endif /* XSC_H */ diff --git a/providers/xscale/xscdv.h b/providers/xscale/xscdv.h new file mode 100644 index 0000000..98d2daf --- /dev/null +++ b/providers/xscale/xscdv.h @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd. + * All rights reserved. + */ + +#ifndef _XSCDV_H_ +#define _XSCDV_H_ + +#include +#include /* For the __be64 type */ +#include +#include +#if defined(__SSE3__) +#include +#include +#include +#endif /* defined(__SSE3__) */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Always inline the functions */ +#ifdef __GNUC__ +#define XSCDV_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +#define XSCDV_ALWAYS_INLINE inline +#endif + +enum { + XSC_RCV_DBR = 0, + XSC_SND_DBR = 1, +}; + +enum xscdv_context_comp_mask { + XSCDV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0, + XSCDV_CONTEXT_MASK_SWP = 1 << 1, + XSCDV_CONTEXT_MASK_STRIDING_RQ = 1 << 2, + XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS = 1 << 3, + XSCDV_CONTEXT_MASK_DYN_BFREGS = 1 << 4, + XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE = 1 << 5, + XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS = 1 << 6, +}; + +struct xscdv_cqe_comp_caps { + uint32_t max_num; + uint32_t supported_format; /* enum xscdv_cqe_comp_res_format */ +}; + +struct xscdv_sw_parsing_caps { + uint32_t sw_parsing_offloads; /* Use enum xscdv_sw_parsing_offloads */ + uint32_t supported_qpts; +}; + +struct xscdv_striding_rq_caps { + uint32_t min_single_stride_log_num_of_bytes; + uint32_t max_single_stride_log_num_of_bytes; + uint32_t min_single_wqe_log_num_of_strides; + uint32_t max_single_wqe_log_num_of_strides; + uint32_t supported_qpts; +}; + +/* + * Direct verbs device-specific attributes + */ +struct xscdv_context { + uint8_t version; + uint64_t flags; + uint64_t comp_mask; + struct xscdv_cqe_comp_caps cqe_comp_caps; + struct xscdv_sw_parsing_caps sw_parsing_caps; + struct xscdv_striding_rq_caps striding_rq_caps; + uint32_t tunnel_offloads_caps; + uint64_t max_clock_info_update_nsec; + uint32_t flow_action_flags; +}; + +enum xscsdv_context_flags { + /* + * This flag indicates if CQE version 0 or 1 is needed. + */ + XSCDV_CONTEXT_FLAGS_CQE_V1 = (1 << 0), + XSCDV_CONTEXT_FLAGS_OBSOLETE = (1 << 1), /* Obsoleted, don't use */ + XSCDV_CONTEXT_FLAGS_MPW_ALLOWED = (1 << 2), + XSCDV_CONTEXT_FLAGS_ENHANCED_MPW = (1 << 3), + XSCDV_CONTEXT_FLAGS_CQE_128B_COMP = (1 << 4), /* Support CQE 128B compression */ + XSCDV_CONTEXT_FLAGS_CQE_128B_PAD = (1 << 5), /* Support CQE 128B padding */ +}; + +enum xscdv_cq_init_attr_mask { + XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0, + XSCDV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 1, + XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE = 1 << 2, +}; + +struct xscdv_cq_init_attr { + uint64_t comp_mask; /* Use enum xscdv_cq_init_attr_mask */ + uint8_t cqe_comp_res_format; /* Use enum xscdv_cqe_comp_res_format */ + uint32_t flags; + uint16_t cqe_size; /* when XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE set */ +}; + +struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr, + struct xscdv_cq_init_attr *xcq_attr); + +enum xscdv_qp_create_flags { + XSCDV_QP_CREATE_TUNNEL_OFFLOADS = 1 << 0, + XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC = 1 << 1, + XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC = 1 << 2, + XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE = 1 << 3, + XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE = 1 << 4, +}; + +enum xscdv_qp_init_attr_mask { + XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, + XSCDV_QP_INIT_ATTR_MASK_DC = 1 << 1, +}; + +enum xscdv_dc_type { + XSCDV_DCTYPE_DCT = 1, + XSCDV_DCTYPE_DCI, +}; + +struct xscdv_dc_init_attr { + enum xscdv_dc_type dc_type; + uint64_t dct_access_key; +}; + +struct xscdv_qp_init_attr { + uint64_t comp_mask; /* Use enum xscdv_qp_init_attr_mask */ + uint32_t create_flags; /* Use enum xsc_qp_create_flags */ + struct xscdv_dc_init_attr dc_init_attr; +}; + +struct ibv_qp *xscdv_create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_attr, + struct xscdv_qp_init_attr *xqp_attr); + +enum xscdv_flow_action_esp_mask { + XSCDV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0, +}; + +struct xscdv_flow_action_esp { + uint64_t comp_mask; /* Use enum xscdv_flow_action_esp_mask */ + uint32_t action_flags; /* Use enum xscdv_flow_action_flags */ +}; + +struct xscdv_flow_match_parameters { + size_t match_sz; + uint64_t match_buf[]; /* Device spec format */ +}; + +struct xscdv_flow_matcher_attr { + enum ibv_flow_attr_type type; + uint32_t flags; /* From enum ibv_flow_flags */ + uint16_t priority; + uint8_t match_criteria_enable; /* Device spec format */ + struct xscdv_flow_match_parameters *match_mask; + uint64_t comp_mask; +}; + +struct xscdv_flow_matcher; + +struct xscdv_flow_matcher * +xscdv_create_flow_matcher(struct ibv_context *context, + struct xscdv_flow_matcher_attr *matcher_attr); + +int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *matcher); + +enum xscdv_flow_action_type { + XSCDV_FLOW_ACTION_DEST_IBV_QP, + XSCDV_FLOW_ACTION_DROP, + XSCDV_FLOW_ACTION_IBV_COUNTER, + XSCDV_FLOW_ACTION_IBV_FLOW_ACTION, + XSCDV_FLOW_ACTION_TAG, + XSCDV_FLOW_ACTION_DEST_DEVX, +}; + +struct xscdv_flow_action_attr { + enum xscdv_flow_action_type type; + union { + struct ibv_qp *qp; + struct ibv_counters *counter; + struct ibv_flow_action *action; + uint32_t tag_value; + struct xscdv_devx_obj *obj; + }; +}; + +struct ibv_flow * +xscdv_create_flow(struct xscdv_flow_matcher *matcher, + struct xscdv_flow_match_parameters *match_value, + size_t num_actions, + struct xscdv_flow_action_attr actions_attr[]); + +struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *esp, + struct xscdv_flow_action_esp *xattr); + +/* + * xscdv_create_flow_action_modify_header - Create a flow action which mutates + * a packet. The flow action can be attached to steering rules via + * ibv_create_flow(). + * + * @ctx: RDMA device context to create the action on. + * @actions_sz: The size of *actions* buffer in bytes. + * @actions: A buffer which contains modify actions provided in device spec + * format. + * @ft_type: Defines the flow table type to which the modify + * header action will be attached. + * + * Return a valid ibv_flow_action if successful, NULL otherwise. + */ +struct ibv_flow_action * +xscdv_create_flow_action_modify_header(struct ibv_context *ctx, + size_t actions_sz, + uint64_t actions[], + enum xscdv_flow_table_type ft_type); + +/* + * xscdv_create_flow_action_packet_reformat - Create flow action which can + * encap/decap packets. + */ +struct ibv_flow_action * +xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx, + size_t data_sz, + void *data, + enum xscdv_flow_action_packet_reformat_type reformat_type, + enum xscdv_flow_table_type ft_type); +/* + * Most device capabilities are exported by ibv_query_device(...), + * but there is HW device-specific information which is important + * for data-path, but isn't provided. + * + * Return 0 on success. + */ +int xscdv_query_device(struct ibv_context *ctx_in, + struct xscdv_context *attrs_out); + +enum xscdv_qp_comp_mask { + XSCDV_QP_MASK_UAR_MMAP_OFFSET = 1 << 0, + XSCDV_QP_MASK_RAW_QP_HANDLES = 1 << 1, +}; + +struct xscdv_qp { + __le32 *dbrec; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + __le32 *db; + } sq; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + __le32 *db; + } rq; + uint64_t comp_mask; + uint32_t tirn; + uint32_t tisn; + uint32_t rqn; + uint32_t sqn; +}; + +struct xscdv_cq { + void *buf; + __le32 *dbrec; + __le32 *db; + uint32_t cqe_cnt; + uint32_t cqe_size; + uint32_t cqn; + uint64_t comp_mask; +}; + +struct xscdv_rwq { + void *buf; + __le32 *dbrec; + uint32_t wqe_cnt; + uint32_t stride; + uint64_t comp_mask; + __le32 *db; +}; + +struct xscdv_dm { + void *buf; + uint64_t length; + uint64_t comp_mask; +}; + +struct xsc_wqe_av; + +struct xscdv_ah { + struct xsc_wqe_av *av; + uint64_t comp_mask; +}; + +struct xscdv_pd { + uint32_t pdn; + uint64_t comp_mask; +}; + +struct xscdv_obj { + struct { + struct ibv_qp *in; + struct xscdv_qp *out; + } qp; + struct { + struct ibv_cq *in; + struct xscdv_cq *out; + } cq; + struct { + struct ibv_wq *in; + struct xscdv_rwq *out; + } rwq; + struct { + struct ibv_dm *in; + struct xscdv_dm *out; + } dm; + struct { + struct ibv_ah *in; + struct xscdv_ah *out; + } ah; + struct { + struct ibv_pd *in; + struct xscdv_pd *out; + } pd; +}; + +enum xscdv_obj_type { + XSCDV_OBJ_QP = 1 << 0, + XSCDV_OBJ_CQ = 1 << 1, + XSCDV_OBJ_SRQ = 1 << 2, + XSCDV_OBJ_RWQ = 1 << 3, + XSCDV_OBJ_DM = 1 << 4, + XSCDV_OBJ_AH = 1 << 5, + XSCDV_OBJ_PD = 1 << 6, +}; + +enum xscdv_wq_init_attr_mask { + XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ = 1 << 0, +}; + +struct xscdv_striding_rq_init_attr { + uint32_t single_stride_log_num_of_bytes; + uint32_t single_wqe_log_num_of_strides; + uint8_t two_byte_shift_en; +}; + +struct xscdv_wq_init_attr { + uint64_t comp_mask; /* Use enum xscdv_wq_init_attr_mask */ + struct xscdv_striding_rq_init_attr striding_rq_attrs; +}; + +/* + * This function creates a work queue object with extra properties + * defined by xscdv_wq_init_attr struct. + * + * For each bit in the comp_mask, a field in xscdv_wq_init_attr + * should follow. + * + * XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ: Create a work queue with + * striding RQ capabilities. + * - single_stride_log_num_of_bytes represents the size of each stride in the + * WQE and its value should be between min_single_stride_log_num_of_bytes + * and max_single_stride_log_num_of_bytes that are reported in + * xscdv_query_device. + * - single_wqe_log_num_of_strides represents the number of strides in each WQE. + * Its value should be between min_single_wqe_log_num_of_strides and + * max_single_wqe_log_num_of_strides that are reported in xscdv_query_device. + * - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeroes + * before writing the message to memory (e.g. for IP alignment) + */ +struct ibv_wq *xscdv_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr, + struct xscdv_wq_init_attr *xwq_attr); +/* + * This function will initialize xscdv_xxx structs based on supplied type. + * The information for initialization is taken from ibv_xx structs supplied + * as part of input. + * + * Request information of CQ marks its owned by DV for all consumer index + * related actions. + * + * The initialization type can be combination of several types together. + * + * Return: 0 in case of success. + */ +int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type); + +enum { + XSC_OPCODE_NOP = 0x00, + XSC_OPCODE_SEND_INVAL = 0x01, + XSC_OPCODE_RDMA_WRITE = 0x08, + XSC_OPCODE_RDMA_WRITE_IMM = 0x09, + XSC_OPCODE_SEND = 0x0a, + XSC_OPCODE_SEND_IMM = 0x0b, + XSC_OPCODE_TSO = 0x0e, + XSC_OPCODE_RDMA_READ = 0x10, + XSC_OPCODE_ATOMIC_CS = 0x11, + XSC_OPCODE_ATOMIC_FA = 0x12, + XSC_OPCODE_ATOMIC_MASKED_CS = 0x14, + XSC_OPCODE_ATOMIC_MASKED_FA = 0x15, + XSC_OPCODE_FMR = 0x19, + XSC_OPCODE_LOCAL_INVAL = 0x1b, + XSC_OPCODE_CONFIG_CMD = 0x1f, + XSC_OPCODE_UMR = 0x25, + XSC_OPCODE_TAG_MATCHING = 0x28 +}; + +enum { + XSC_CQE_L2_OK = 1 << 0, + XSC_CQE_L3_OK = 1 << 1, + XSC_CQE_L4_OK = 1 << 2, +}; + +enum { + XSC_CQE_L3_HDR_TYPE_NONE = 0x0, + XSC_CQE_L3_HDR_TYPE_IPV6 = 0x1, + XSC_CQE_L3_HDR_TYPE_IPV4 = 0x2, +}; + +enum { + XSC_CQE_OWNER_MASK = 1, + XSC_CQE_REQ = 0, + XSC_CQE_RESP_WR_IMM = 1, + XSC_CQE_RESP_SEND = 2, + XSC_CQE_RESP_SEND_IMM = 3, + XSC_CQE_RESP_SEND_INV = 4, + XSC_CQE_RESIZE_CQ = 5, + XSC_CQE_NO_PACKET = 6, + XSC_CQE_REQ_ERR = 13, + XSC_CQE_RESP_ERR = 14, + XSC_CQE_INVALID = 15, +}; + +struct xsc_err_cqe { + uint8_t rsvd0[32]; + uint32_t srqn; + uint8_t rsvd1[18]; + uint8_t vendor_err_synd; + uint8_t syndrome; + uint32_t s_wqe_opcode_qpn; + uint16_t wqe_counter; + uint8_t signature; + uint8_t op_own; +}; + +struct xsc_tm_cqe { + __be32 success; + __be16 hw_phase_cnt; + uint8_t rsvd0[12]; +}; + +struct xsc_cqe64 { + union { + struct { + uint8_t rsvd0[2]; + __be16 wqe_id; + uint8_t rsvd4[13]; + uint8_t ml_path; + uint8_t rsvd20[4]; + __be16 slid; + __be32 flags_rqpn; + uint8_t hds_ip_ext; + uint8_t l4_hdr_type_etc; + __be16 vlan_info; + }; + struct xsc_tm_cqe tm_cqe; + /* TMH is scattered to CQE upon match */ + struct ibv_tmh tmh; + }; + __be32 srqn_uidx; + __be32 imm_inval_pkey; + uint8_t app; + uint8_t app_op; + __be16 app_info; + __be32 byte_cnt; + __be64 timestamp; + __be32 sop_drop_qpn; + __be16 wqe_counter; + uint8_t signature; + uint8_t op_own; +}; + +enum xscdv_cqe_comp_res_format { + XSCDV_CQE_RES_FORMAT_HASH = 1 << 0, + XSCDV_CQE_RES_FORMAT_CSUM = 1 << 1, + XSCDV_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, +}; + +enum xscdv_sw_parsing_offloads { + XSCDV_SW_PARSING = 1 << 0, + XSCDV_SW_PARSING_CSUM = 1 << 1, + XSCDV_SW_PARSING_LSO = 1 << 2, +}; + +static XSCDV_ALWAYS_INLINE +uint8_t xscdv_get_cqe_owner(struct xsc_cqe64 *cqe) +{ + return cqe->op_own & 0x1; +} + +static XSCDV_ALWAYS_INLINE +void xscdv_set_cqe_owner(struct xsc_cqe64 *cqe, uint8_t val) +{ + cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1); +} + +/* Solicited event */ +static XSCDV_ALWAYS_INLINE +uint8_t xscdv_get_cqe_se(struct xsc_cqe64 *cqe) +{ + return (cqe->op_own >> 1) & 0x1; +} + +static XSCDV_ALWAYS_INLINE +uint8_t xscdv_get_cqe_format(struct xsc_cqe64 *cqe) +{ + return (cqe->op_own >> 2) & 0x3; +} + +static XSCDV_ALWAYS_INLINE +uint8_t xscdv_get_cqe_opcode(struct xsc_cqe64 *cqe) +{ + return cqe->op_own >> 4; +} + +/* + * WQE related part + */ +enum { + XSC_INVALID_LKEY = 0x100, +}; + +enum { + XSC_SEND_WQE_BB = 64, + XSC_SEND_WQE_SHIFT = 6, +}; + +struct xsc_wqe_srq_next_seg { + uint8_t rsvd0[2]; + __be16 next_wqe_index; + uint8_t signature; + uint8_t rsvd1[11]; +}; + +struct xsc_wqe_ctrl_seg { + __be32 opmod_idx_opcode; + __be32 qpn_ds; + uint8_t signature; + uint8_t rsvd[2]; + uint8_t fm_ce_se; + __be32 imm; +}; + +struct xsc_wqe_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + uint8_t stat_rate_sl; + uint8_t fl_mlid; + __be16 rlid; + uint8_t reserved0[4]; + uint8_t rmac[6]; + uint8_t tclass; + uint8_t hop_limit; + __be32 grh_gid_fl; + uint8_t rgid[16]; +}; + +struct xsc_wqe_datagram_seg { + struct xsc_wqe_av av; +}; + +struct xsc_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + __be32 reserved; +}; + +struct xsc_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct xsc_wqe_inl_data_seg { + uint32_t byte_count; +}; + +struct xsc_wqe_eth_seg { + __be32 rsvd0; + uint8_t cs_flags; + uint8_t rsvd1; + __be16 mss; + __be32 rsvd2; + __be16 inline_hdr_sz; + uint8_t inline_hdr_start[2]; + uint8_t inline_hdr[16]; +}; + +/* + * Control segment - contains some control information for the current WQE. + * + * Output: + * seg - control segment to be filled + * Input: + * pi - WQEBB number of the first block of this WQE. + * This number should wrap at 0xffff, regardless of + * size of the WQ. + * opcode - Opcode of this WQE. Encodes the type of operation + * to be executed on the QP. + * opmod - Opcode modifier. + * qp_num - QP/SQ number this WQE is posted to. + * fm_ce_se - FM (fence mode), CE (completion and event mode) + * and SE (solicited event). + * ds - WQE size in octowords (16-byte units). DS accounts for all + * the segments in the WQE as summarized in WQE construction. + * signature - WQE signature. + * imm - Immediate data/Invalidation key/UMR mkey. + */ +static XSCDV_ALWAYS_INLINE +void xscdv_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, uint8_t ds, + uint8_t signature, uint32_t imm) +{ + seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode); + seg->qpn_ds = htobe32((qp_num << 8) | ds); + seg->fm_ce_se = fm_ce_se; + seg->signature = signature; + /* + * The caller should prepare "imm" in advance based on WR opcode. + * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, + * the "imm" should be assigned as is. + * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm). + */ + seg->imm = imm; +} + +/* x86 optimized version of xscdv_set_ctrl_seg() + * + * This is useful when doing calculations on large data sets + * for parallel calculations. + * + * It doesn't suit for serialized algorithms. + */ +#if defined(__SSE3__) +static XSCDV_ALWAYS_INLINE +void xscdv_x86_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, uint8_t ds, + uint8_t signature, uint32_t imm) +{ + __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi, + (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se); + __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */ + 0, /* signal/fence_mode */ +#if CHAR_MIN + -128, -128, /* reserved */ +#else + 0x80, 0x80, /* reserved */ +#endif + 3, /* signature */ + 6, /* data size */ + 8, 9, 10, /* QP num */ + 2, /* opcode */ + 4, 5, /* sw_pi in BE */ + 1 /* opmod */ + ); + *(__m128i *) seg = _mm_shuffle_epi8(val, mask); +} +#endif /* defined(__SSE3__) */ + +/* + * Datagram Segment - contains address information required in order + * to form a datagram message. + * + * Output: + * seg - datagram segment to be filled. + * Input: + * key - Q_key/access key. + * dqp_dct - Destination QP number for UD and DCT for DC. + * ext - Address vector extension. + * stat_rate_sl - Maximum static rate control, SL/ethernet priority. + * fl_mlid - Force loopback and source LID for IB. + * rlid - Remote LID + * rmac - Remote MAC + * tclass - GRH tclass/IPv6 tclass/IPv4 ToS + * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL + * grh_gid_fi - GRH, source GID address and IPv6 flow label. + * rgid - Remote GID/IP address. + */ +static XSCDV_ALWAYS_INLINE +void xscdv_set_dgram_seg(struct xsc_wqe_datagram_seg *seg, + uint64_t key, uint32_t dqp_dct, + uint8_t ext, uint8_t stat_rate_sl, + uint8_t fl_mlid, uint16_t rlid, + uint8_t *rmac, uint8_t tclass, + uint8_t hop_limit, uint32_t grh_gid_fi, + uint8_t *rgid) +{ + + /* Always put 64 bits, in q_key, the reserved part will be 0 */ + seg->av.key.dc_key = htobe64(key); + seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct); + seg->av.stat_rate_sl = stat_rate_sl; + seg->av.fl_mlid = fl_mlid; + seg->av.rlid = htobe16(rlid); + memcpy(seg->av.rmac, rmac, 6); + seg->av.tclass = tclass; + seg->av.hop_limit = hop_limit; + seg->av.grh_gid_fl = htobe32(grh_gid_fi); + memcpy(seg->av.rgid, rgid, 16); +} + +/* + * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading. + * + * Output: + * seg - Eth segment to be filled. + * Input: + * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner. + * mss - Maximum segment size. For TSO WQEs, the number of bytes + * in the TCP payload to be transmitted in each packet. Must + * be 0 on non TSO WQEs. + * inline_hdr_sz - Length of the inlined packet headers. + * inline_hdr_start - Inlined packet header. + */ +static XSCDV_ALWAYS_INLINE +void xscdv_set_eth_seg(struct xsc_wqe_eth_seg *seg, uint8_t cs_flags, + uint16_t mss, uint16_t inline_hdr_sz, + uint8_t *inline_hdr_start) +{ + seg->cs_flags = cs_flags; + seg->mss = htobe16(mss); + seg->inline_hdr_sz = htobe16(inline_hdr_sz); + memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz); +} + +enum xscdv_set_ctx_attr_type { + XSCDV_CTX_ATTR_BUF_ALLOCATORS = 1, +}; + +enum { + XSC_MMAP_GET_REGULAR_PAGES_CMD = 0, + XSC_MMAP_GET_NC_PAGES_CMD = 3, +}; + +struct xscdv_ctx_allocators { + void *(*alloc)(size_t size, void *priv_data); + void (*free)(void *ptr, void *priv_data); + void *data; +}; + +/* + * Generic context attributes set API + * + * Returns 0 on success, or the value of errno on failure + * (which indicates the failure reason). + */ +int xscdv_set_context_attr(struct ibv_context *context, + enum xscdv_set_ctx_attr_type type, void *attr); + +struct xscdv_clock_info { + uint64_t nsec; + uint64_t last_cycles; + uint64_t frac; + uint32_t mult; + uint32_t shift; + uint64_t mask; +}; + +/* + * Get xsc core clock info + * + * Output: + * clock_info - clock info to be filled + * Input: + * context - device context + * + * Return: 0 on success, or the value of errno on failure + */ +int xscdv_get_clock_info(struct ibv_context *context, + struct xscdv_clock_info *clock_info); + +/* + * Translate device timestamp to nano-sec + * + * Input: + * clock_info - clock info to be filled + * device_timestamp - timestamp to translate + * + * Return: nano-sec + */ +static inline uint64_t xscdv_ts_to_ns(struct xscdv_clock_info *clock_info, + uint64_t device_timestamp) +{ + uint64_t delta, nsec; + + /* + * device_timestamp & cycles are the free running 'mask' bit counters + * from the hardware hca_core_clock clock. + */ + delta = (device_timestamp - clock_info->last_cycles) & clock_info->mask; + nsec = clock_info->nsec; + + /* + * Guess if the device_timestamp is more recent than + * clock_info->last_cycles, if not (too far in the future) treat + * it as old time stamp. This will break every max_clock_info_update_nsec. + */ + + if (delta > clock_info->mask / 2) { + delta = (clock_info->last_cycles - device_timestamp) & + clock_info->mask; + nsec -= ((delta * clock_info->mult) - clock_info->frac) >> + clock_info->shift; + } else { + nsec += ((delta * clock_info->mult) + clock_info->frac) >> + clock_info->shift; + } + + return nsec; +} + +enum xscdv_context_attr_flags { + XSCDV_CONTEXT_FLAGS_DEVX = 1 << 0, +}; + +struct xscdv_context_attr { + uint32_t flags; /* Use enum xscdv_context_attr_flags */ + uint64_t comp_mask; +}; + +struct ibv_context * +xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr); + +struct xscdv_devx_obj; + +struct xscdv_devx_obj * +xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen); +int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen); +int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen); +int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj); +int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen); + +struct xscdv_devx_umem { + uint32_t umem_id; +}; + +struct xscdv_devx_umem * +xscdv_devx_umem_reg(struct ibv_context *ctx, void *addr, size_t size, uint32_t access); +int xscdv_devx_umem_dereg(struct xscdv_devx_umem *umem); +int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector, + uint32_t *eqn); + +#ifdef __cplusplus +} +#endif + +#endif /* _XSCDV_H_ */ diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec index c347195..a7aa5bc 100644 --- a/redhat/rdma-core.spec +++ b/redhat/rdma-core.spec @@ -176,6 +176,8 @@ Provides: libocrdma = %{version}-%{release} Obsoletes: libocrdma < %{version}-%{release} Provides: librxe = %{version}-%{release} Obsoletes: librxe < %{version}-%{release} +Provides: libxscale = %{version}-%{release} +Obsoletes: libxscale < %{version}-%{release} %description -n libibverbs libibverbs is a library that allows userspace processes to use RDMA @@ -202,6 +204,7 @@ Device-specific plug-in ibverbs userspace drivers are included: - librxe: A software implementation of the RoCE protocol - libsiw: A software implementation of the iWarp protocol - libvmw_pvrdma: VMware paravirtual RDMA device +- libxscale: Yunsilicon RDMA device %package -n libibverbs-utils Summary: Examples for the libibverbs library @@ -583,6 +586,7 @@ fi %{_libdir}/libmana.so.* %{_libdir}/libmlx5.so.* %{_libdir}/libmlx4.so.* +%{_libdir}/libxscale.so.* %config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver %doc %{_docdir}/%{name}/libibverbs.md -- 2.43.0