rdma-core/0044-libxscale-Add-Yunsilicon-User-Space-RDMA-Driver.patch
Xin Tian e30985bd56 libxscale: Add Yunsilicon User Space RDMA Driver
Introduce xscale provider for Yunsilicon devices.

Signed-off-by: Xin Tian <tianx@yunsilicon.com>
2025-03-04 10:11:56 +08:00

10921 lines
288 KiB
Diff

From cd10cbeac856514302fbad8f1d17ec770f44faa9 Mon Sep 17 00:00:00 2001
From: Xin Tian <tianx@yunsilicon.com>
Date: Wed, 26 Feb 2025 10:29:59 +0800
Subject: [PATCH] libxscale: Add Yunsilicon User Space RDMA Driver
Introduce xscale provider for Yunsilicon devices.
Signed-off-by: Xin Tian <tianx@yunsilicon.com>
---
CMakeLists.txt | 1 +
MAINTAINERS | 6 +
README.md | 1 +
debian/control | 1 +
debian/copyright | 4 +
debian/ibverbs-providers.install | 1 +
debian/libibverbs-dev.install | 7 +
kernel-headers/CMakeLists.txt | 4 +
kernel-headers/rdma/ib_user_ioctl_verbs.h | 1 +
kernel-headers/rdma/xsc-abi.h | 333 +++
kernel-headers/rdma/xsc_user_ioctl_cmds.h | 163 ++
kernel-headers/rdma/xsc_user_ioctl_verbs.h | 27 +
libibverbs/verbs.h | 1 +
providers/xscale/CMakeLists.txt | 18 +
providers/xscale/bitmap.h | 84 +
providers/xscale/buf.c | 594 +++++
providers/xscale/cq.c | 1410 ++++++++++
providers/xscale/cqm_csr_defines.h | 180 ++
providers/xscale/dbrec.c | 131 +
providers/xscale/libxsc.map | 59 +
providers/xscale/qp.c | 678 +++++
providers/xscale/rqm_csr_defines.h | 200 ++
providers/xscale/sqm_csr_defines.h | 204 ++
providers/xscale/verbs.c | 2816 ++++++++++++++++++++
providers/xscale/wqe.h | 72 +
providers/xscale/xsc-abi.h | 56 +
providers/xscale/xsc_api.h | 29 +
providers/xscale/xsc_hsi.h | 252 ++
providers/xscale/xsc_hw.h | 584 ++++
providers/xscale/xscale.c | 948 +++++++
providers/xscale/xscale.h | 834 ++++++
providers/xscale/xscdv.h | 876 ++++++
redhat/rdma-core.spec | 4 +
33 files changed, 10579 insertions(+)
create mode 100644 kernel-headers/rdma/xsc-abi.h
create mode 100644 kernel-headers/rdma/xsc_user_ioctl_cmds.h
create mode 100644 kernel-headers/rdma/xsc_user_ioctl_verbs.h
create mode 100644 providers/xscale/CMakeLists.txt
create mode 100644 providers/xscale/bitmap.h
create mode 100644 providers/xscale/buf.c
create mode 100644 providers/xscale/cq.c
create mode 100644 providers/xscale/cqm_csr_defines.h
create mode 100644 providers/xscale/dbrec.c
create mode 100644 providers/xscale/libxsc.map
create mode 100644 providers/xscale/qp.c
create mode 100644 providers/xscale/rqm_csr_defines.h
create mode 100644 providers/xscale/sqm_csr_defines.h
create mode 100644 providers/xscale/verbs.c
create mode 100644 providers/xscale/wqe.h
create mode 100644 providers/xscale/xsc-abi.h
create mode 100644 providers/xscale/xsc_api.h
create mode 100644 providers/xscale/xsc_hsi.h
create mode 100755 providers/xscale/xsc_hw.h
create mode 100644 providers/xscale/xscale.c
create mode 100644 providers/xscale/xscale.h
create mode 100644 providers/xscale/xscdv.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 98985e7..c803f73 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -748,6 +748,7 @@ add_subdirectory(providers/mthca)
add_subdirectory(providers/ocrdma)
add_subdirectory(providers/qedr)
add_subdirectory(providers/vmw_pvrdma)
+add_subdirectory(providers/xscale)
endif()
add_subdirectory(providers/hfi1verbs)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b24117..d3b66ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -185,6 +185,12 @@ L: pv-drivers@vmware.com
S: Supported
F: providers/vmw_pvrdma/
+XSCALE USERSPACE PROVIDER (for xsc_ib.ko)
+M: Honggang Wei <weihg@yunsilicon.com>
+M: Xin Tianx <tianx@yunsilicon.com>
+S: Supported
+F: providers/xscale/
+
PYVERBS
M: Edward Srouji <edwards@mellanox.com>
S: Supported
diff --git a/README.md b/README.md
index 928bdc4..8f7a9a5 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ is included:
- rdma_rxe.ko
- siw.ko
- vmw_pvrdma.ko
+ - xsc_ib.ko
Additional service daemons are provided for:
- srp_daemon (ib_srp.ko)
diff --git a/debian/control b/debian/control
index 2a55372..5296ea7 100644
--- a/debian/control
+++ b/debian/control
@@ -99,6 +99,7 @@ Description: User space provider drivers for libibverbs
- rxe: A software implementation of the RoCE protocol
- siw: A software implementation of the iWarp protocol
- vmw_pvrdma: VMware paravirtual RDMA device
+ - xscale: Yunsilicon RDMA device
Package: ibverbs-utils
Architecture: linux-any
diff --git a/debian/copyright b/debian/copyright
index 36ac71e..0a623e3 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -228,6 +228,10 @@ Files: providers/vmw_pvrdma/*
Copyright: 2012-2016 VMware, Inc.
License: BSD-2-clause or GPL-2
+Files: providers/xscale/*
+Copyright: 2021-2025, Yunsilicon Technology Co., Ltd.
+License: GPL-2
+
Files: rdma-ndd/*
Copyright: 2004-2016, Intel Corporation.
License: BSD-MIT or GPL-2
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
index fea15e0..d20bd42 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
@@ -5,3 +5,4 @@ usr/lib/*/libhns.so.*
usr/lib/*/libmana.so.*
usr/lib/*/libmlx4.so.*
usr/lib/*/libmlx5.so.*
+usr/lib/*/libxscale.so.*
diff --git a/debian/libibverbs-dev.install b/debian/libibverbs-dev.install
index ef5b9a4..8cd09fa 100644
--- a/debian/libibverbs-dev.install
+++ b/debian/libibverbs-dev.install
@@ -26,6 +26,13 @@ usr/lib/*/libmlx4.a
usr/lib/*/libmlx4.so
usr/lib/*/libmlx5.a
usr/lib/*/libmlx5.so
+<<<<<<< HEAD
+=======
+usr/lib/*/libxscale.a
+usr/lib/*/libxscale.so
+usr/lib/*/libzrdma.a
+usr/lib/*/libzrdma.so
+>>>>>>> 3be924c... libxscale: Add Yunsilicon User Space RDMA Driver
usr/lib/*/pkgconfig/libefa.pc
usr/lib/*/pkgconfig/libhns.pc
usr/lib/*/pkgconfig/libibverbs.pc
diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
index 82c191c..9acb1fc 100644
--- a/kernel-headers/CMakeLists.txt
+++ b/kernel-headers/CMakeLists.txt
@@ -26,6 +26,9 @@ publish_internal_headers(rdma
rdma/rvt-abi.h
rdma/siw-abi.h
rdma/vmw_pvrdma-abi.h
+ rdma/xsc-abi.h
+ rdma/xsc_user_ioctl_cmds.h
+ rdma/xsc_user_ioctl_verbs.h
)
publish_internal_headers(rdma/hfi
@@ -80,6 +83,7 @@ rdma_kernel_provider_abi(
rdma/rdma_user_rxe.h
rdma/siw-abi.h
rdma/vmw_pvrdma-abi.h
+ rdma/xsc-abi.h
)
publish_headers(infiniband
diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
index fe15bc7..cfac178 100644
--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
+++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
@@ -255,6 +255,7 @@ enum rdma_driver_id {
RDMA_DRIVER_SIW,
RDMA_DRIVER_ERDMA,
RDMA_DRIVER_MANA,
+ RDMA_DRIVER_XSC = 1,
};
enum ib_uverbs_gid_type {
diff --git a/kernel-headers/rdma/xsc-abi.h b/kernel-headers/rdma/xsc-abi.h
new file mode 100644
index 0000000..4af6408
--- /dev/null
+++ b/kernel-headers/rdma/xsc-abi.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_ABI_USER_H
+#define XSC_ABI_USER_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h> /* For ETH_ALEN. */
+#include <rdma/ib_user_ioctl_verbs.h>
+
+enum {
+ XSC_WQ_FLAG_SIGNATURE = 1 << 0,
+};
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct xsc_ib_alloc_ucontext_req {
+ __u32 rsvd0;
+ __u32 rsvd1;
+};
+
+enum xsc_user_cmds_supp_uhw {
+ XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+ XSC_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1,
+};
+
+struct xsc_ib_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 cache_line_size;
+ __u16 max_sq_desc_sz;
+ __u16 max_rq_desc_sz;
+ __u32 max_send_wqebb;
+ __u32 max_recv_wr;
+ __u16 num_ports;
+ __u16 device_id;
+ __u64 qpm_tx_db;
+ __u64 qpm_rx_db;
+ __u64 cqm_next_cid_reg;
+ __u64 cqm_armdb;
+ __u32 send_ds_num;
+ __u32 recv_ds_num;
+ __u32 cmds_supp_uhw;
+};
+
+struct xsc_ib_alloc_pd_resp {
+ __u32 pdn;
+};
+
+struct xsc_ib_tso_caps {
+ __u32 max_tso; /* Maximum tso payload size in bytes */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_UD
+ */
+ __u32 supported_qpts;
+};
+
+struct xsc_ib_rss_caps {
+ __aligned_u64 rx_hash_fields_mask; /* enum xsc_rx_hash_fields */
+ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */
+ __u8 reserved[7];
+};
+
+enum xsc_ib_cqe_comp_res_format {
+ XSC_IB_CQE_RES_FORMAT_HASH = 1 << 0,
+ XSC_IB_CQE_RES_FORMAT_CSUM = 1 << 1,
+ XSC_IB_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2,
+};
+
+struct xsc_ib_cqe_comp_caps {
+ __u32 max_num;
+ __u32 supported_format; /* enum xsc_ib_cqe_comp_res_format */
+};
+
+enum xsc_ib_packet_pacing_cap_flags {
+ XSC_IB_PP_SUPPORT_BURST = 1 << 0,
+};
+
+struct xsc_packet_pacing_caps {
+ __u32 qp_rate_limit_min;
+ __u32 qp_rate_limit_max; /* In kpbs */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+ */
+ __u32 supported_qpts;
+ __u8 cap_flags; /* enum xsc_ib_packet_pacing_cap_flags */
+ __u8 reserved[3];
+};
+
+enum xsc_ib_mpw_caps {
+ MPW_RESERVED = 1 << 0,
+ XSC_IB_ALLOW_MPW = 1 << 1,
+ XSC_IB_SUPPORT_EMPW = 1 << 2,
+};
+
+enum xsc_ib_sw_parsing_offloads {
+ XSC_IB_SW_PARSING = 1 << 0,
+ XSC_IB_SW_PARSING_CSUM = 1 << 1,
+ XSC_IB_SW_PARSING_LSO = 1 << 2,
+};
+
+struct xsc_ib_sw_parsing_caps {
+ __u32 sw_parsing_offloads; /* enum xsc_ib_sw_parsing_offloads */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+ */
+ __u32 supported_qpts;
+};
+
+struct xsc_ib_striding_rq_caps {
+ __u32 min_single_stride_log_num_of_bytes;
+ __u32 max_single_stride_log_num_of_bytes;
+ __u32 min_single_wqe_log_num_of_strides;
+ __u32 max_single_wqe_log_num_of_strides;
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+ */
+ __u32 supported_qpts;
+ __u32 reserved;
+};
+
+enum xsc_ib_query_dev_resp_flags {
+ /* Support 128B CQE compression */
+ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
+ XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
+};
+
+enum xsc_ib_tunnel_offloads {
+ XSC_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0,
+ XSC_IB_TUNNELED_OFFLOADS_GRE = 1 << 1,
+ XSC_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2,
+ XSC_IB_TUNNELED_OFFLOADS_MPLS_GRE = 1 << 3,
+ XSC_IB_TUNNELED_OFFLOADS_MPLS_UDP = 1 << 4,
+};
+
+struct xsc_ib_query_device_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ struct xsc_ib_tso_caps tso_caps;
+ struct xsc_ib_rss_caps rss_caps;
+ struct xsc_ib_cqe_comp_caps cqe_comp_caps;
+ struct xsc_packet_pacing_caps packet_pacing_caps;
+ __u32 xsc_ib_support_multi_pkt_send_wqes;
+ __u32 flags; /* Use enum xsc_ib_query_dev_resp_flags */
+ struct xsc_ib_sw_parsing_caps sw_parsing_caps;
+ struct xsc_ib_striding_rq_caps striding_rq_caps;
+ __u32 tunnel_offloads_caps; /* enum xsc_ib_tunnel_offloads */
+ __u32 reserved;
+};
+
+struct xsc_ib_create_cq {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __u32 cqe_size;
+};
+
+struct xsc_ib_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct xsc_ib_resize_cq {
+ __aligned_u64 buf_addr;
+ __u16 cqe_size;
+ __u16 reserved0;
+ __u32 reserved1;
+};
+
+struct xsc_ib_create_qp {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __u32 sq_wqe_count;
+ __u32 rq_wqe_count;
+ __u32 rq_wqe_shift;
+ __u32 flags;
+};
+
+/* RX Hash function flags */
+enum xsc_rx_hash_function_flags {
+ XSC_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
+ * and *TCP and *UDP flags can't be enabled together on the same QP.
+*/
+enum xsc_rx_hash_fields {
+ XSC_RX_HASH_SRC_IPV4 = 1 << 0,
+ XSC_RX_HASH_DST_IPV4 = 1 << 1,
+ XSC_RX_HASH_SRC_IPV6 = 1 << 2,
+ XSC_RX_HASH_DST_IPV6 = 1 << 3,
+ XSC_RX_HASH_SRC_PORT_TCP = 1 << 4,
+ XSC_RX_HASH_DST_PORT_TCP = 1 << 5,
+ XSC_RX_HASH_SRC_PORT_UDP = 1 << 6,
+ XSC_RX_HASH_DST_PORT_UDP = 1 << 7,
+ XSC_RX_HASH_IPSEC_SPI = 1 << 8,
+ /* Save bits for future fields */
+ XSC_RX_HASH_INNER = (1UL << 31),
+};
+
+struct xsc_ib_create_qp_rss {
+ __aligned_u64 rx_hash_fields_mask; /* enum xscd_rx_hash_fields */
+ __u8 rx_hash_function; /* enum xsc_rx_hash_function_flags */
+ __u8 rx_key_len; /* valid only for Toeplitz */
+ __u8 reserved[6];
+ __u8 rx_hash_key[128]; /* valid only for Toeplitz */
+ __u32 comp_mask;
+ __u32 flags;
+};
+
+struct xsc_ib_create_qp_resp {
+ __u32 bfreg_index;
+ __u32 resv;
+};
+
+enum xsc_ib_create_wq_mask {
+ XSC_IB_CREATE_WQ_STRIDING_RQ = (1 << 0),
+};
+
+struct xsc_ib_create_wq {
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+ __u32 rq_wqe_count;
+ __u32 rq_wqe_shift;
+ __u32 user_index;
+ __u32 flags;
+ __u32 comp_mask;
+ __u32 single_stride_log_num_of_bytes;
+ __u32 single_wqe_log_num_of_strides;
+ __u32 two_byte_shift_en;
+};
+
+struct xsc_ib_create_ah_resp {
+ __u32 response_length;
+ __u8 dmac[ETH_ALEN];
+ __u8 reserved[6];
+};
+
+struct xsc_ib_burst_info {
+ __u32 max_burst_sz;
+ __u16 typical_pkt_sz;
+ __u16 reserved;
+};
+
+struct xsc_ib_modify_qp {
+ __u32 comp_mask;
+ struct xsc_ib_burst_info burst_info;
+ __u32 reserved;
+};
+
+struct xsc_ib_modify_qp_resp {
+ __u32 response_length;
+ __u32 dctn;
+};
+
+struct xsc_ib_create_wq_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+struct xsc_ib_modify_wq {
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
+struct xsc_ib_clock_info {
+ __u32 sign;
+ __u32 resv;
+ __aligned_u64 nsec;
+ __aligned_u64 cycles;
+ __aligned_u64 frac;
+ __u32 mult;
+ __u32 shift;
+ __aligned_u64 mask;
+ __aligned_u64 overflow_period;
+};
+
+enum xsc_ib_mmap_cmd {
+ XSC_IB_MMAP_REGULAR_PAGE = 0,
+ XSC_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ XSC_IB_MMAP_WC_PAGE = 2,
+ XSC_IB_MMAP_NC_PAGE = 3,
+ XSC_IB_MMAP_CORE_CLOCK = 5,
+ XSC_IB_MMAP_ALLOC_WC = 6,
+ XSC_IB_MMAP_CLOCK_INFO = 7,
+ XSC_IB_MMAP_DEVICE_MEM = 8,
+};
+
+enum {
+ XSC_IB_CLOCK_INFO_KERNEL_UPDATING = 1,
+};
+
+struct xsc_ib_flow_counters_desc {
+ __u32 description;
+ __u32 index;
+};
+
+struct xsc_ib_flow_counters_data {
+ RDMA_UAPI_PTR(struct xsc_ib_flow_counters_desc *, counters_data);
+ __u32 ncounters;
+ __u32 reserved;
+};
+
+struct xsc_ib_create_flow {
+ __u32 ncounters_data;
+ __u32 reserved;
+ /*
+ * Following are counters data based on ncounters_data, each
+ * entry in the data[] should match a corresponding counter object
+ * that was pointed by a counters spec upon the flow creation
+ */
+ struct xsc_ib_flow_counters_data data[];
+};
+
+#endif /* XSC_ABI_USER_H */
diff --git a/kernel-headers/rdma/xsc_user_ioctl_cmds.h b/kernel-headers/rdma/xsc_user_ioctl_cmds.h
new file mode 100644
index 0000000..590a061
--- /dev/null
+++ b/kernel-headers/rdma/xsc_user_ioctl_cmds.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_USER_IOCTL_CMDS_H
+#define XSC_USER_IOCTL_CMDS_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+enum xsc_ib_create_flow_action_attrs {
+ /* This attribute belong to the driver namespace */
+ XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum xsc_ib_alloc_dm_attrs {
+ XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+};
+
+enum xsc_ib_devx_methods {
+ XSC_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_DEVX_QUERY_UAR,
+ XSC_IB_METHOD_DEVX_QUERY_EQN,
+};
+
+enum xsc_ib_devx_other_attrs {
+ XSC_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_OTHER_CMD_OUT,
+};
+
+enum xsc_ib_devx_obj_create_attrs {
+ XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+};
+
+enum xsc_ib_devx_query_uar_attrs {
+ XSC_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+};
+
+enum xsc_ib_devx_obj_destroy_attrs {
+ XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum xsc_ib_devx_obj_modify_attrs {
+ XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum xsc_ib_devx_obj_query_attrs {
+ XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
+enum xsc_ib_devx_query_eqn_attrs {
+ XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+};
+
+enum xsc_ib_devx_obj_methods {
+ XSC_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_DEVX_OBJ_DESTROY,
+ XSC_IB_METHOD_DEVX_OBJ_MODIFY,
+ XSC_IB_METHOD_DEVX_OBJ_QUERY,
+};
+
+enum xsc_ib_devx_umem_reg_attrs {
+ XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ XSC_IB_ATTR_DEVX_UMEM_REG_LEN,
+ XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum xsc_ib_devx_umem_dereg_attrs {
+ XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum xsc_ib_devx_umem_methods {
+ XSC_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
+enum xsc_ib_objects {
+ XSC_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_OBJECT_DEVX_OBJ,
+ XSC_IB_OBJECT_DEVX_UMEM,
+ XSC_IB_OBJECT_FLOW_MATCHER,
+};
+
+enum xsc_ib_flow_matcher_create_attrs {
+ XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+};
+
+enum xsc_ib_flow_matcher_destroy_attrs {
+ XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum xsc_ib_flow_matcher_methods {
+ XSC_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_FLOW_MATCHER_DESTROY,
+};
+
+#define XSC_IB_DW_MATCH_PARAM 0x80
+
+struct xsc_ib_match_params {
+ __u32 match_params[XSC_IB_DW_MATCH_PARAM];
+};
+
+enum xsc_ib_flow_type {
+ XSC_IB_FLOW_TYPE_NORMAL,
+ XSC_IB_FLOW_TYPE_SNIFFER,
+ XSC_IB_FLOW_TYPE_ALL_DEFAULT,
+ XSC_IB_FLOW_TYPE_MC_DEFAULT,
+};
+
+enum xsc_ib_create_flow_attrs {
+ XSC_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ XSC_IB_ATTR_CREATE_FLOW_DEST_QP,
+ XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ XSC_IB_ATTR_CREATE_FLOW_MATCHER,
+ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ XSC_IB_ATTR_CREATE_FLOW_TAG,
+};
+
+enum xsc_ib_destoy_flow_attrs {
+ XSC_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum xsc_ib_flow_methods {
+ XSC_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_DESTROY_FLOW,
+};
+
+enum xsc_ib_flow_action_methods {
+ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+};
+
+enum xsc_ib_create_flow_action_create_modify_header_attrs {
+ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+};
+
+enum xsc_ib_create_flow_action_create_packet_reformat_attrs {
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+};
+
+#endif
diff --git a/kernel-headers/rdma/xsc_user_ioctl_verbs.h b/kernel-headers/rdma/xsc_user_ioctl_verbs.h
new file mode 100644
index 0000000..614f2ee
--- /dev/null
+++ b/kernel-headers/rdma/xsc_user_ioctl_verbs.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_USER_IOCTL_VERBS_H
+#define XSC_USER_IOCTL_VERBS_H
+
+#include <linux/types.h>
+
+enum xsc_ib_uapi_flow_action_flags {
+ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0,
+};
+
+enum xsc_ib_uapi_flow_table_type {
+ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0,
+ XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1,
+};
+
+enum xsc_ib_uapi_flow_action_packet_reformat_type {
+ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0,
+ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1,
+ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
+ XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
+};
+
+#endif
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 78129fd..d6a053e 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2275,6 +2275,7 @@ extern const struct verbs_device_ops verbs_provider_qedr;
extern const struct verbs_device_ops verbs_provider_rxe;
extern const struct verbs_device_ops verbs_provider_siw;
extern const struct verbs_device_ops verbs_provider_vmw_pvrdma;
+extern const struct verbs_device_ops verbs_provider_xscale;
extern const struct verbs_device_ops verbs_provider_all;
extern const struct verbs_device_ops verbs_provider_none;
void ibv_static_providers(void *unused, ...);
diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt
new file mode 100644
index 0000000..1188db1
--- /dev/null
+++ b/providers/xscale/CMakeLists.txt
@@ -0,0 +1,18 @@
+rdma_shared_provider(xscale libxsc.map
+ 1 1.24.${PACKAGE_VERSION}
+ buf.c
+ cq.c
+ dbrec.c
+ xscale.c
+ qp.c
+ verbs.c
+)
+
+publish_headers(infiniband
+ ../../kernel-headers/rdma/xsc_user_ioctl_verbs.h
+ ../../kernel-headers/rdma/xsc_user_ioctl_cmds.h
+ xsc_api.h
+ xscdv.h
+)
+
+rdma_pkg_config("xscale" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}")
diff --git a/providers/xscale/bitmap.h b/providers/xscale/bitmap.h
new file mode 100644
index 0000000..ef7f202
--- /dev/null
+++ b/providers/xscale/bitmap.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef BITMAP_H
+#define BITMAP_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <linux/errno.h>
+#include "xscale.h"
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define XSC_SHM_ADDR ((void *)0x8000000000000000UL)
+#define XSC_SHMAT_FLAGS (SHM_RND)
+#else
+#define XSC_SHM_ADDR NULL
+#define XSC_SHMAT_FLAGS 0
+#endif
+
+#define BITS_PER_LONG (8 * sizeof(long))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+#ifndef HPAGE_SIZE
+#define HPAGE_SIZE (2UL * 1024 * 1024)
+#endif
+
+#define XSC_SHM_LENGTH HPAGE_SIZE
+#define XSC_Q_CHUNK_SIZE 32768
+#define XSC_SHM_NUM_REGION 64
+
+static inline unsigned long xsc_ffz(uint32_t word)
+{
+ return __builtin_ffs(~word) - 1;
+}
+
+static inline uint32_t xsc_find_first_zero_bit(const unsigned long *addr,
+ uint32_t size)
+{
+ const unsigned long *p = addr;
+ uint32_t result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ tmp = *(p++);
+ if (~tmp)
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) | (~0UL << size);
+ if (tmp == (uint32_t)~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found:
+ return result + xsc_ffz(tmp);
+}
+
+static inline void xsc_set_bit(unsigned int nr, unsigned long *addr)
+{
+ addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG));
+}
+
+static inline void xsc_clear_bit(unsigned int nr, unsigned long *addr)
+{
+ addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG));
+}
+
+static inline int xsc_test_bit(unsigned int nr, const unsigned long *addr)
+{
+ return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG)));
+}
+
+#endif
diff --git a/providers/xscale/buf.c b/providers/xscale/buf.c
new file mode 100644
index 0000000..61daf6d
--- /dev/null
+++ b/providers/xscale/buf.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <signal.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "xscale.h"
+#include "bitmap.h"
+
+static int xsc_bitmap_init(struct xsc_bitmap *bitmap, uint32_t num,
+ uint32_t mask)
+{
+ bitmap->last = 0;
+ bitmap->top = 0;
+ bitmap->max = num;
+ bitmap->avail = num;
+ bitmap->mask = mask;
+ bitmap->avail = bitmap->max;
+ bitmap->table = calloc(BITS_TO_LONGS(bitmap->max), sizeof(*bitmap->table));
+ if (!bitmap->table)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bitmap_free_range(struct xsc_bitmap *bitmap, uint32_t obj,
+ int cnt)
+{
+ int i;
+
+ obj &= bitmap->max - 1;
+
+ for (i = 0; i < cnt; i++)
+ xsc_clear_bit(obj + i, bitmap->table);
+ bitmap->last = min(bitmap->last, obj);
+ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+ bitmap->avail += cnt;
+}
+
+static int bitmap_empty(struct xsc_bitmap *bitmap)
+{
+ return (bitmap->avail == bitmap->max) ? 1 : 0;
+}
+
+static int bitmap_avail(struct xsc_bitmap *bitmap)
+{
+ return bitmap->avail;
+}
+
+static void xsc_bitmap_cleanup(struct xsc_bitmap *bitmap)
+{
+ if (bitmap->table)
+ free(bitmap->table);
+}
+
+static void free_huge_mem(struct xsc_hugetlb_mem *hmem)
+{
+ xsc_bitmap_cleanup(&hmem->bitmap);
+ if (shmdt(hmem->shmaddr) == -1)
+ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno));
+ shmctl(hmem->shmid, IPC_RMID, NULL);
+ free(hmem);
+}
+
+static int xsc_bitmap_alloc(struct xsc_bitmap *bitmap)
+{
+ uint32_t obj;
+ int ret;
+
+ obj = xsc_find_first_zero_bit(bitmap->table, bitmap->max);
+ if (obj < bitmap->max) {
+ xsc_set_bit(obj, bitmap->table);
+ bitmap->last = (obj + 1);
+ if (bitmap->last == bitmap->max)
+ bitmap->last = 0;
+ obj |= bitmap->top;
+ ret = obj;
+ } else
+ ret = -1;
+
+ if (ret != -1)
+ --bitmap->avail;
+
+ return ret;
+}
+
+static uint32_t find_aligned_range(unsigned long *bitmap,
+ uint32_t start, uint32_t nbits,
+ int len, int alignment)
+{
+ uint32_t end, i;
+
+again:
+ start = align(start, alignment);
+
+ while ((start < nbits) && xsc_test_bit(start, bitmap))
+ start += alignment;
+
+ if (start >= nbits)
+ return -1;
+
+ end = start + len;
+ if (end > nbits)
+ return -1;
+
+ for (i = start + 1; i < end; i++) {
+ if (xsc_test_bit(i, bitmap)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ return start;
+}
+
+static int bitmap_alloc_range(struct xsc_bitmap *bitmap, int cnt,
+ int align)
+{
+ uint32_t obj;
+ int ret, i;
+
+ if (cnt == 1 && align == 1)
+ return xsc_bitmap_alloc(bitmap);
+
+ if (cnt > bitmap->max)
+ return -1;
+
+ obj = find_aligned_range(bitmap->table, bitmap->last,
+ bitmap->max, cnt, align);
+ if (obj >= bitmap->max) {
+ bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+ obj = find_aligned_range(bitmap->table, 0, bitmap->max,
+ cnt, align);
+ }
+
+ if (obj < bitmap->max) {
+ for (i = 0; i < cnt; i++)
+ xsc_set_bit(obj + i, bitmap->table);
+ if (obj == bitmap->last) {
+ bitmap->last = (obj + cnt);
+ if (bitmap->last >= bitmap->max)
+ bitmap->last = 0;
+ }
+ obj |= bitmap->top;
+ ret = obj;
+ } else
+ ret = -1;
+
+ if (ret != -1)
+ bitmap->avail -= cnt;
+
+ return obj;
+}
+
+static struct xsc_hugetlb_mem *alloc_huge_mem(size_t size)
+{
+ struct xsc_hugetlb_mem *hmem;
+ size_t shm_len;
+
+ hmem = malloc(sizeof(*hmem));
+ if (!hmem)
+ return NULL;
+
+ shm_len = align(size, XSC_SHM_LENGTH);
+ hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W);
+ if (hmem->shmid == -1) {
+ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno));
+ goto out_free;
+ }
+
+ hmem->shmaddr = shmat(hmem->shmid, XSC_SHM_ADDR, XSC_SHMAT_FLAGS);
+ if (hmem->shmaddr == (void *)-1) {
+ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno));
+ goto out_rmid;
+ }
+
+ if (xsc_bitmap_init(&hmem->bitmap, shm_len / XSC_Q_CHUNK_SIZE,
+ shm_len / XSC_Q_CHUNK_SIZE - 1)) {
+ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno));
+ goto out_shmdt;
+ }
+
+ /*
+ * Marked to be destroyed when process detaches from shmget segment
+ */
+ shmctl(hmem->shmid, IPC_RMID, NULL);
+
+ return hmem;
+
+out_shmdt:
+ if (shmdt(hmem->shmaddr) == -1)
+ xsc_dbg(stderr, XSC_DBG_CONTIG, "%s\n", strerror(errno));
+
+out_rmid:
+ shmctl(hmem->shmid, IPC_RMID, NULL);
+
+out_free:
+ free(hmem);
+ return NULL;
+}
+
+static int alloc_huge_buf(struct xsc_context *xctx, struct xsc_buf *buf,
+ size_t size, int page_size)
+{
+ int found = 0;
+ int nchunk;
+ struct xsc_hugetlb_mem *hmem;
+ int ret;
+
+ buf->length = align(size, XSC_Q_CHUNK_SIZE);
+ nchunk = buf->length / XSC_Q_CHUNK_SIZE;
+
+ if (!nchunk)
+ return 0;
+
+ xsc_spin_lock(&xctx->hugetlb_lock);
+ list_for_each(&xctx->hugetlb_list, hmem, entry) {
+ if (bitmap_avail(&hmem->bitmap)) {
+ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
+ if (buf->base != -1) {
+ buf->hmem = hmem;
+ found = 1;
+ break;
+ }
+ }
+ }
+ xsc_spin_unlock(&xctx->hugetlb_lock);
+
+ if (!found) {
+ hmem = alloc_huge_mem(buf->length);
+ if (!hmem)
+ return -1;
+
+ buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1);
+ if (buf->base == -1) {
+ free_huge_mem(hmem);
+ /* TBD: remove after proven stability */
+ fprintf(stderr, "BUG: huge allocation\n");
+ return -1;
+ }
+
+ buf->hmem = hmem;
+
+ xsc_spin_lock(&xctx->hugetlb_lock);
+ if (bitmap_avail(&hmem->bitmap))
+ list_add(&xctx->hugetlb_list, &hmem->entry);
+ else
+ list_add_tail(&xctx->hugetlb_list, &hmem->entry);
+ xsc_spin_unlock(&xctx->hugetlb_lock);
+ }
+
+ buf->buf = hmem->shmaddr + buf->base * XSC_Q_CHUNK_SIZE;
+
+ ret = ibv_dontfork_range(buf->buf, buf->length);
+ if (ret) {
+ goto out_fork;
+ }
+ buf->type = XSC_ALLOC_TYPE_HUGE;
+
+ return 0;
+
+out_fork:
+ xsc_spin_lock(&xctx->hugetlb_lock);
+ bitmap_free_range(&hmem->bitmap, buf->base, nchunk);
+ if (bitmap_empty(&hmem->bitmap)) {
+ list_del(&hmem->entry);
+ xsc_spin_unlock(&xctx->hugetlb_lock);
+ free_huge_mem(hmem);
+ } else
+ xsc_spin_unlock(&xctx->hugetlb_lock);
+
+ return -1;
+}
+
+static void free_huge_buf(struct xsc_context *ctx, struct xsc_buf *buf)
+{
+ int nchunk;
+
+ nchunk = buf->length / XSC_Q_CHUNK_SIZE;
+ if (!nchunk)
+ return;
+
+ xsc_spin_lock(&ctx->hugetlb_lock);
+ bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk);
+ if (bitmap_empty(&buf->hmem->bitmap)) {
+ list_del(&buf->hmem->entry);
+ xsc_spin_unlock(&ctx->hugetlb_lock);
+ free_huge_mem(buf->hmem);
+ } else
+ xsc_spin_unlock(&ctx->hugetlb_lock);
+}
+
+void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf)
+{
+ ibv_dofork_range(buf->buf, buf->length);
+ ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data);
+}
+
+int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf,
+ size_t size)
+{
+ void *addr;
+
+ addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data);
+ if (addr || size == 0) {
+ if (ibv_dontfork_range(addr, size)) {
+ xsc_err("External mode dontfork_range failed\n");
+ ctx->extern_alloc.free(addr,
+ ctx->extern_alloc.data);
+ return -1;
+ }
+ buf->buf = addr;
+ buf->length = size;
+ buf->type = XSC_ALLOC_TYPE_EXTERNAL;
+ return 0;
+ }
+
+ xsc_err("External alloc failed\n");
+ return -1;
+}
+
+int xsc_alloc_prefered_buf(struct xsc_context *xctx,
+ struct xsc_buf *buf,
+ size_t size, int page_size,
+ enum xsc_alloc_type type,
+ const char *component)
+{
+ int ret;
+
+ /*
+ * Fallback mechanism priority:
+ * huge pages
+ * contig pages
+ * default
+ */
+ if (type == XSC_ALLOC_TYPE_HUGE ||
+ type == XSC_ALLOC_TYPE_PREFER_HUGE ||
+ type == XSC_ALLOC_TYPE_ALL) {
+ ret = alloc_huge_buf(xctx, buf, size, page_size);
+ if (!ret)
+ return 0;
+
+ if (type == XSC_ALLOC_TYPE_HUGE)
+ return -1;
+
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Huge mode allocation failed, fallback to %s mode\n",
+ XSC_ALLOC_TYPE_ALL ? "contig" : "default");
+ }
+
+ if (type == XSC_ALLOC_TYPE_CONTIG ||
+ type == XSC_ALLOC_TYPE_PREFER_CONTIG ||
+ type == XSC_ALLOC_TYPE_ALL) {
+ ret = xsc_alloc_buf_contig(xctx, buf, size, page_size, component);
+ if (!ret)
+ return 0;
+
+ if (type == XSC_ALLOC_TYPE_CONTIG)
+ return -1;
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "Contig allocation failed, fallback to default mode\n");
+ }
+
+ if (type == XSC_ALLOC_TYPE_EXTERNAL)
+ return xsc_alloc_buf_extern(xctx, buf, size);
+
+ return xsc_alloc_buf(buf, size, page_size);
+
+}
+
+int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf)
+{
+ int err = 0;
+
+ switch (buf->type) {
+ case XSC_ALLOC_TYPE_ANON:
+ xsc_free_buf(buf);
+ break;
+
+ case XSC_ALLOC_TYPE_HUGE:
+ free_huge_buf(ctx, buf);
+ break;
+
+ case XSC_ALLOC_TYPE_CONTIG:
+ xsc_free_buf_contig(ctx, buf);
+ break;
+
+ case XSC_ALLOC_TYPE_EXTERNAL:
+ xsc_free_buf_extern(ctx, buf);
+ break;
+
+ default:
+ fprintf(stderr, "Bad allocation type\n");
+ }
+
+ return err;
+}
+
+/* This function computes log2(v) rounded up.
+ We don't want to have a dependency to libm which exposes ceil & log2 APIs.
+ Code was written based on public domain code:
+ URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog.
+*/
+static uint32_t xsc_get_block_order(uint32_t v)
+{
+ static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
+ static const uint32_t shift_arr[] = {1, 2, 4, 8, 16};
+ int i;
+ uint32_t input_val = v;
+
+ register uint32_t r = 0;/* result of log2(v) will go here */
+ for (i = 4; i >= 0; i--) {
+ if (v & bits_arr[i]) {
+ v >>= shift_arr[i];
+ r |= shift_arr[i];
+ }
+ }
+ /* Rounding up if required */
+ r += !!(input_val & ((1 << r) - 1));
+
+ return r;
+}
+
+bool xsc_is_extern_alloc(struct xsc_context *context)
+{
+ return context->extern_alloc.alloc && context->extern_alloc.free;
+}
+
+void xsc_get_alloc_type(struct xsc_context *context,
+ const char *component,
+ enum xsc_alloc_type *alloc_type,
+ enum xsc_alloc_type default_type)
+
+{
+ char *env_value;
+ char name[128];
+
+ if (xsc_is_extern_alloc(context)) {
+ *alloc_type = XSC_ALLOC_TYPE_EXTERNAL;
+ return;
+ }
+
+ snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component);
+
+ *alloc_type = default_type;
+
+ env_value = getenv(name);
+ if (env_value) {
+ if (!strcasecmp(env_value, "ANON"))
+ *alloc_type = XSC_ALLOC_TYPE_ANON;
+ else if (!strcasecmp(env_value, "HUGE"))
+ *alloc_type = XSC_ALLOC_TYPE_HUGE;
+ else if (!strcasecmp(env_value, "CONTIG"))
+ *alloc_type = XSC_ALLOC_TYPE_CONTIG;
+ else if (!strcasecmp(env_value, "PREFER_CONTIG"))
+ *alloc_type = XSC_ALLOC_TYPE_PREFER_CONTIG;
+ else if (!strcasecmp(env_value, "PREFER_HUGE"))
+ *alloc_type = XSC_ALLOC_TYPE_PREFER_HUGE;
+ else if (!strcasecmp(env_value, "ALL"))
+ *alloc_type = XSC_ALLOC_TYPE_ALL;
+ }
+}
+
+static void xsc_alloc_get_env_info(int *max_block_log,
+ int *min_block_log,
+ const char *component)
+
+{
+ char *env;
+ int value;
+ char name[128];
+
+ /* First set defaults */
+ *max_block_log = XSC_MAX_LOG2_CONTIG_BLOCK_SIZE;
+ *min_block_log = XSC_MIN_LOG2_CONTIG_BLOCK_SIZE;
+
+ snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component);
+ env = getenv(name);
+ if (env) {
+ value = atoi(env);
+ if (value <= XSC_MAX_LOG2_CONTIG_BLOCK_SIZE &&
+ value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE)
+ *max_block_log = value;
+ else
+ fprintf(stderr, "Invalid value %d for %s\n",
+ value, name);
+ }
+ sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component);
+ env = getenv(name);
+ if (env) {
+ value = atoi(env);
+ if (value >= XSC_MIN_LOG2_CONTIG_BLOCK_SIZE &&
+ value <= *max_block_log)
+ *min_block_log = value;
+ else
+ fprintf(stderr, "Invalid value %d for %s\n",
+ value, name);
+ }
+}
+
+int xsc_alloc_buf_contig(struct xsc_context *xctx,
+ struct xsc_buf *buf, size_t size,
+ int page_size,
+ const char *component)
+{
+ void *addr = MAP_FAILED;
+ int block_size_exp;
+ int max_block_log;
+ int min_block_log;
+ struct ibv_context *context = &xctx->ibv_ctx.context;
+ off_t offset;
+
+ xsc_alloc_get_env_info(&max_block_log,
+ &min_block_log,
+ component);
+
+ block_size_exp = xsc_get_block_order(size);
+
+ if (block_size_exp > max_block_log)
+ block_size_exp = max_block_log;
+
+ do {
+ offset = 0;
+ set_command(XSC_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset);
+ set_order(block_size_exp, &offset);
+ addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED,
+ context->cmd_fd, page_size * offset);
+ if (addr != MAP_FAILED)
+ break;
+
+ /*
+ * The kernel returns EINVAL if not supported
+ */
+ if (errno == EINVAL)
+ return -1;
+
+ block_size_exp -= 1;
+ } while (block_size_exp >= min_block_log);
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CONTIG, "block order %d, addr %p\n", block_size_exp, addr);
+
+ if (addr == MAP_FAILED)
+ return -1;
+
+ if (ibv_dontfork_range(addr, size)) {
+ munmap(addr, size);
+ return -1;
+ }
+
+ buf->buf = addr;
+ buf->length = size;
+ buf->type = XSC_ALLOC_TYPE_CONTIG;
+
+ return 0;
+}
+
+void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf)
+{
+ ibv_dofork_range(buf->buf, buf->length);
+ munmap(buf->buf, buf->length);
+}
+
+int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size)
+{
+ int ret;
+ int al_size;
+
+ al_size = align(size, page_size);
+ ret = posix_memalign(&buf->buf, page_size, al_size);
+ if (ret)
+ return ret;
+
+ ret = ibv_dontfork_range(buf->buf, al_size);
+ if (ret)
+ free(buf->buf);
+
+ if (!ret) {
+ buf->length = al_size;
+ buf->type = XSC_ALLOC_TYPE_ANON;
+ }
+
+ return ret;
+}
+
+void xsc_free_buf(struct xsc_buf *buf)
+{
+ ibv_dofork_range(buf->buf, buf->length);
+ free(buf->buf);
+}
diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c
new file mode 100644
index 0000000..e2619f0
--- /dev/null
+++ b/providers/xscale/cq.c
@@ -0,0 +1,1410 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <util/compiler.h>
+#include <util/mmio.h>
+#include <infiniband/opcode.h>
+
+#include "xscale.h"
+#include "wqe.h"
+#include "xsc_hsi.h"
+
+enum {
+ CQ_OK = 0,
+ CQ_EMPTY = -1,
+ CQ_POLL_ERR = -2
+};
+
+enum {
+ XSC_CQE_APP_TAG_MATCHING = 1,
+};
+
+enum {
+ XSC_CQE_APP_OP_TM_CONSUMED = 0x1,
+ XSC_CQE_APP_OP_TM_EXPECTED = 0x2,
+ XSC_CQE_APP_OP_TM_UNEXPECTED = 0x3,
+ XSC_CQE_APP_OP_TM_NO_TAG = 0x4,
+ XSC_CQE_APP_OP_TM_APPEND = 0x5,
+ XSC_CQE_APP_OP_TM_REMOVE = 0x6,
+ XSC_CQE_APP_OP_TM_NOOP = 0x7,
+ XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV = 0x9,
+ XSC_CQE_APP_OP_TM_CONSUMED_MSG = 0xA,
+ XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV = 0xB,
+ XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED = 0xC,
+};
+
+static const uint32_t xsc_msg_opcode[][2][2] = {
+ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND,
+ [XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_SEND_IMMDT,
+ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV,
+ [XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_RECV_IMMDT,
+ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE,
+ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_REQ_WRITE_IMMDT,
+ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR,
+ [XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_RSP_WRITE_IMMDT,
+ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_REQ_READ,
+ [XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR,
+ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITHOUT_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR,
+ [XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITH_IMMDT] = XSC_OPCODE_RDMA_CQE_ERROR,
+};
+
+static const uint32_t xsc_cqe_opcode[] = {
+ [XSC_OPCODE_RDMA_REQ_SEND] = IBV_WC_SEND,
+ [XSC_OPCODE_RDMA_REQ_SEND_IMMDT] = IBV_WC_SEND,
+ [XSC_OPCODE_RDMA_RSP_RECV] = IBV_WC_RECV,
+ [XSC_OPCODE_RDMA_RSP_RECV_IMMDT] = IBV_WC_RECV,
+ [XSC_OPCODE_RDMA_REQ_WRITE] = IBV_WC_RDMA_WRITE,
+ [XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE,
+ [XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM,
+ [XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ,
+};
+
+int xsc_stall_num_loop = 60;
+int xsc_stall_cq_poll_min = 60;
+int xsc_stall_cq_poll_max = 100000;
+int xsc_stall_cq_inc_step = 100;
+int xsc_stall_cq_dec_step = 10;
+
+static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe) ALWAYS_INLINE;
+static inline uint8_t xsc_get_cqe_opcode(struct xsc_cqe *cqe)
+{
+ if (cqe->is_error)
+ return cqe->type ? XSC_OPCODE_RDMA_RSP_ERROR : XSC_OPCODE_RDMA_REQ_ERROR;
+ if (cqe->msg_opcode > XSC_MSG_OPCODE_RDMA_READ) {
+ printf("rdma cqe msg code should be send/write/read\n");
+ return XSC_OPCODE_RDMA_CQE_ERROR;
+ }
+ return xsc_msg_opcode[cqe->msg_opcode][cqe->type][cqe->with_immdt];
+}
+
+static inline uint8_t get_cqe_l3_hdr_type(struct xsc_cqe64 *cqe)
+{
+ return (cqe->l4_hdr_type_etc >> 2) & 0x3;
+}
+
+static void *get_cqe(struct xsc_cq *cq, int n)
+{
+ return cq->active_buf->buf + n * cq->cqe_sz;
+}
+
+static void *get_sw_cqe(struct xsc_cq *cq, int n)
+{
+ int cid = n & (cq->verbs_cq.cq_ex.cqe - 1);
+ struct xsc_cqe *cqe = get_cqe(cq, cid);
+ if (likely(xsc_get_cqe_sw_own(cqe, n, cq->log2_cq_ring_sz)))
+ return cqe;
+ else
+ return NULL;
+}
+
+static void *next_cqe_sw(struct xsc_cq *cq)
+{
+ return get_sw_cqe(cq, cq->cons_index);
+}
+
+static void update_cons_index(struct xsc_cq *cq)
+{
+ union xsc_db_data db;
+
+ db.raw_data = cq->cons_index;
+ db.cqn = cq->cqn;
+ WR_REG(cq->db, db.raw_data);
+}
+
+static inline void handle_good_req(
+ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq, uint8_t opcode)
+{
+ int idx;
+ struct xsc_send_wqe_ctrl_seg *ctrl;
+
+ wc->opcode = xsc_cqe_opcode[opcode];
+ wc->status = IBV_WC_SUCCESS;
+ idx = RD_LE_16(cqe->wqe_id);
+ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+ idx &= (wq->wqe_cnt -1);
+ wc->wr_id = wq->wrid[idx];
+ wq->tail = wq->wqe_head[idx] + 1;
+ if (opcode == XSC_OPCODE_RDMA_REQ_READ) {
+ ctrl = xsc_get_send_wqe(qp, idx);
+ wc->byte_len = ctrl->msg_len;
+ }
+ wq->flush_wqe_cnt--;
+
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE,
+ "wqeid:%u, wq tail:%u\n", idx, wq->tail);
+}
+
+/* Returns IBV_WC_IP_CSUM_OK or 0 */
+static inline int get_csum_ok(struct xsc_cqe64 *cqe)
+{
+ return (((cqe->hds_ip_ext & (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) ==
+ (XSC_CQE_L4_OK | XSC_CQE_L3_OK)) &
+ (get_cqe_l3_hdr_type(cqe) == XSC_CQE_L3_HDR_TYPE_IPV4))
+ << IBV_WC_IP_CSUM_OK_SHIFT;
+}
+
+static inline void handle_good_responder(
+ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq, uint8_t opcode)
+{
+ uint16_t idx;
+ struct xsc_qp *qp = container_of(wq, struct xsc_qp, rq);
+
+ wc->byte_len = RD_LE_32(cqe->msg_len);
+ wc->opcode = xsc_cqe_opcode[opcode];
+ wc->status = IBV_WC_SUCCESS;
+
+ idx = wq->tail & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
+ ++wq->tail;
+ wq->flush_wqe_cnt--;
+
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE,
+ "recv cqe idx:%u, len:%u\n", idx, wc->byte_len);
+}
+
+static void dump_cqe(void *buf)
+{
+ __le32 *p = buf;
+ int i;
+
+ for (i = 0; i < 8; i += 4)
+ printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i+1], p[i+2], p[i+3]);
+}
+
+static enum ibv_wc_status xsc_cqe_error_code(struct xsc_cqe *cqe)
+{
+ switch (cqe->error_code) {
+ case XSC_ERR_CODE_NAK_RETRY:
+ return IBV_WC_RETRY_EXC_ERR;
+ case XSC_ERR_CODE_NAK_OPCODE:
+ return IBV_WC_BAD_RESP_ERR;
+ case XSC_ERR_CODE_NAK_MR:
+ return IBV_WC_REM_ACCESS_ERR;
+ case XSC_ERR_CODE_NAK_OPERATION:
+ return IBV_WC_REM_OP_ERR;
+ case XSC_ERR_CODE_NAK_RNR:
+ return IBV_WC_RNR_RETRY_EXC_ERR;
+ case XSC_ERR_CODE_LOCAL_MR:
+ return IBV_WC_LOC_PROT_ERR;
+ case XSC_ERR_CODE_LOCAL_LEN:
+ return IBV_WC_LOC_LEN_ERR;
+ case XSC_ERR_CODE_LEN_GEN_CQE:
+ return IBV_WC_LOC_LEN_ERR;
+ case XSC_ERR_CODE_OPERATION:
+ return IBV_WC_LOC_ACCESS_ERR;
+ case XSC_ERR_CODE_FLUSH:
+ return IBV_WC_WR_FLUSH_ERR;
+ case XSC_ERR_CODE_MALF_WQE_HOST:
+ case XSC_ERR_CODE_STRG_ACC_GEN_CQE:
+ return IBV_WC_FATAL_ERR;
+ case XSC_ERR_CODE_OPCODE_GEN_CQE:
+ case XSC_ERR_CODE_LOCAL_OPCODE:
+ default:
+ return IBV_WC_GENERAL_ERR;
+ }
+}
+
+
+static inline bool xsc_qp_need_cqe(struct xsc_qp *qp, int *type, int *wqe_id)
+{
+ struct xsc_wq *wq;
+ struct xsc_send_wqe_ctrl_seg *ctrl;
+ int idx = 0;
+
+ /* check recv queue work request */
+ wq = &qp->rq;
+ if (wq->head - wq->tail > 0) {
+ *type = 1;
+ return true;
+ }
+ /* check send queue work request */
+ wq = &qp->sq;
+ while (wq->head - wq->tail > 0) {
+ idx = wq->tail & (wq->wqe_cnt - 1);
+ ++wq->tail;
+ ctrl = xsc_get_send_wqe(qp, idx);
+ if (ctrl->ce) {
+ *type = 0;
+ *wqe_id = idx << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+ return true;
+ }
+ }
+ return false;
+}
+
+static inline void handle_bad_req(
+ struct xsc_context *xctx,
+ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_qp *qp, struct xsc_wq *wq)
+{
+ int idx;
+ wc->status = xsc_cqe_error_code(cqe);
+ wc->vendor_err = cqe->error_code;
+ idx = RD_LE_16(cqe->wqe_id);
+ idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+ idx &= (wq->wqe_cnt -1);
+ wq->tail = wq->wqe_head[idx] + 1;
+ wc->wr_id = wq->wrid[idx];
+ wq->flush_wqe_cnt--;
+
+ if (cqe->error_code != XSC_ERR_CODE_FLUSH) {
+ printf("%s: got completion with error:\n", xctx->hostname);
+ dump_cqe(cqe);
+ }
+}
+
+static inline void handle_bad_responder(
+ struct xsc_context *xctx,
+ struct ibv_wc *wc, struct xsc_cqe *cqe, struct xsc_wq *wq)
+{
+ wc->status = xsc_cqe_error_code(cqe);
+ wc->vendor_err = cqe->error_code;
+
+ ++wq->tail;
+ wq->flush_wqe_cnt--;
+
+ if (cqe->error_code != XSC_ERR_CODE_FLUSH) {
+ printf("%s: got completion with error:\n", xctx->hostname);
+ dump_cqe(cqe);
+ }
+}
+
+#if defined(__x86_64__) || defined (__i386__)
+static inline unsigned long get_cycles(void)
+{
+ uint32_t low, high;
+ uint64_t val;
+ asm volatile ("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val = (val << 32) | low;
+ return val;
+}
+
+static void xsc_stall_poll_cq(void)
+{
+ int i;
+
+ for (i = 0; i < xsc_stall_num_loop; i++)
+ (void)get_cycles();
+}
+static void xsc_stall_cycles_poll_cq(uint64_t cycles)
+{
+ while (get_cycles() < cycles)
+ ; /* Nothing */
+}
+static void xsc_get_cycles(uint64_t *cycles)
+{
+ *cycles = get_cycles();
+}
+#else
+static void xsc_stall_poll_cq(void)
+{
+}
+static void xsc_stall_cycles_poll_cq(uint64_t cycles)
+{
+}
+static void xsc_get_cycles(uint64_t *cycles)
+{
+}
+#endif
+
+static inline int get_qp_ctx(struct xsc_context *xctx,
+ struct xsc_resource **cur_rsc,
+ uint32_t qpn)
+ ALWAYS_INLINE;
+static inline int get_qp_ctx(struct xsc_context *xctx,
+ struct xsc_resource **cur_rsc,
+ uint32_t qpn)
+{
+ if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ *cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn);
+ if (unlikely(!*cur_rsc))
+ return CQ_POLL_ERR;
+ }
+
+ return CQ_OK;
+}
+
+static inline int xsc_get_next_cqe(struct xsc_cq *cq,
+ struct xsc_cqe64 **pcqe64,
+ void **pcqe)
+ ALWAYS_INLINE;
+static inline int xsc_get_next_cqe(struct xsc_cq *cq,
+ struct xsc_cqe64 **pcqe64,
+ void **pcqe)
+{
+ void *cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return CQ_EMPTY;
+
+ ++cq->cons_index;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ udma_from_device_barrier();
+
+ *pcqe = cqe;
+
+ return CQ_OK;
+}
+
+static inline int xsc_parse_cqe(struct xsc_cq *cq,
+ struct xsc_cqe *cqe,
+ struct xsc_resource **cur_rsc,
+ struct ibv_wc *wc,
+ int lazy)
+{
+ struct xsc_wq *wq;
+ uint32_t qp_id;
+ uint8_t opcode;
+ int err = 0;
+ struct xsc_qp *xqp = NULL;
+ struct xsc_context *xctx;
+
+ xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context);
+ qp_id = cqe->qp_id;
+ qp_id = RD_LE_16(qp_id);
+ wc->wc_flags = 0;
+ wc->qp_num = qp_id;
+ opcode = xsc_get_cqe_opcode(cqe);
+
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode, qp_id);
+ switch (opcode) {
+ case XSC_OPCODE_RDMA_REQ_SEND_IMMDT:
+ case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT:
+ wc->wc_flags |= IBV_WC_WITH_IMM;
+ SWITCH_FALLTHROUGH;
+ case XSC_OPCODE_RDMA_REQ_SEND:
+ case XSC_OPCODE_RDMA_REQ_WRITE:
+ case XSC_OPCODE_RDMA_REQ_READ:
+ err = get_qp_ctx(xctx, cur_rsc, qp_id);
+ if (unlikely(err))
+ return CQ_EMPTY;
+ xqp = rsc_to_xqp(*cur_rsc);
+ wq = &xqp->sq;
+ handle_good_req(wc, cqe, xqp, wq, opcode);
+ break;
+ case XSC_OPCODE_RDMA_RSP_RECV_IMMDT:
+ case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT:
+ wc->wc_flags |= IBV_WC_WITH_IMM;
+ wc->imm_data = cqe->imm_data;
+ SWITCH_FALLTHROUGH;
+ case XSC_OPCODE_RDMA_RSP_RECV:
+ err = get_qp_ctx(xctx, cur_rsc, qp_id);
+ if (unlikely(err))
+ return CQ_EMPTY;
+ xqp = rsc_to_xqp(*cur_rsc);
+ wq = &xqp->rq;
+ handle_good_responder(wc, cqe, wq, opcode);
+ break;
+ case XSC_OPCODE_RDMA_REQ_ERROR:
+ err = get_qp_ctx(xctx, cur_rsc, qp_id);
+ if (unlikely(err))
+ return CQ_POLL_ERR;
+ xqp = rsc_to_xqp(*cur_rsc);
+ wq = &xqp->sq;
+ handle_bad_req(xctx, wc, cqe, xqp, wq);
+ break;
+ case XSC_OPCODE_RDMA_RSP_ERROR:
+ err = get_qp_ctx(xctx, cur_rsc, qp_id);
+ if (unlikely(err))
+ return CQ_POLL_ERR;
+ xqp = rsc_to_xqp(*cur_rsc);
+ wq = &xqp->rq;
+ handle_bad_responder(xctx, wc, cqe, wq);
+ break;
+ case XSC_OPCODE_RDMA_CQE_ERROR:
+ printf("%s: got completion with cqe format error:\n", xctx->hostname);
+ dump_cqe(cqe);
+ SWITCH_FALLTHROUGH;
+ default:
+ return CQ_POLL_ERR;
+ }
+ return CQ_OK;
+}
+
+static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq,
+ struct xsc_cqe64 *cqe64,
+ void *cqe, int cqe_ver)
+ ALWAYS_INLINE;
+static inline int xsc_parse_lazy_cqe(struct xsc_cq *cq,
+ struct xsc_cqe64 *cqe64,
+ void *cqe, int cqe_ver)
+{
+ return xsc_parse_cqe(cq, cqe, &cq->cur_rsc, NULL, 1);
+}
+
+static inline int xsc_poll_one(struct xsc_cq *cq,
+ struct xsc_resource **cur_rsc,
+ struct ibv_wc *wc)
+ ALWAYS_INLINE;
+static inline int xsc_poll_one(struct xsc_cq *cq,
+ struct xsc_resource **cur_rsc,
+ struct ibv_wc *wc)
+{
+ struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index);
+ if (cqe == NULL) {
+ return CQ_EMPTY;
+ }
+ memset(wc, 0, sizeof(*wc));
+
+ ++cq->cons_index;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ udma_from_device_barrier();
+ return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0);
+}
+
+static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node,
+ uint32_t qp_id, struct xsc_wq *wq, uint32_t idx,
+ struct ibv_wc *wc)
+{
+ memset(wc, 0, sizeof(*wc));
+ if (err_node->is_sq) {
+ switch (wq->wr_opcode[idx]){
+ case IBV_WR_SEND:
+ case IBV_WR_SEND_WITH_IMM:
+ case IBV_WR_SEND_WITH_INV:
+ wc->opcode = IBV_WC_SEND;
+ break;
+ case IBV_WR_RDMA_WRITE:
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ wc->opcode = IBV_WC_RDMA_WRITE;
+ break;
+ case IBV_WR_RDMA_READ:
+ wc->opcode = IBV_WC_RDMA_READ;
+ }
+ } else {
+ wc->opcode = IBV_WC_RECV;
+ }
+
+ wc->qp_num = qp_id;
+ wc->status = IBV_WC_WR_FLUSH_ERR;
+ wc->vendor_err = XSC_ERR_CODE_FLUSH;
+ wc->wr_id = wq->wrid[idx];
+ wq->tail++;
+ wq->flush_wqe_cnt--;
+}
+
+static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq,
+ int ne, int *npolled, struct ibv_wc *wc)
+{
+ uint32_t qp_id = 0;
+ uint32_t flush_wqe_cnt = 0;
+ int sw_npolled = 0;
+ int ret = 0;
+ uint32_t idx = 0;
+ struct xsc_err_state_qp_node *err_qp_node, *tmp;
+ struct xsc_resource *res = NULL;
+ struct xsc_context *xctx = to_xctx(ibcq->context);
+ struct xsc_cq *cq = to_xcq(ibcq);
+ struct xsc_wq *wq;
+
+ list_for_each_safe(&cq->err_state_qp_list, err_qp_node, tmp, entry) {
+ if (!err_qp_node)
+ break;
+
+ sw_npolled = 0;
+ qp_id = err_qp_node->qp_id;
+ ret = get_qp_ctx(xctx, &res, qp_id);
+ if (unlikely(ret))
+ continue;
+ wq = err_qp_node->is_sq ? &(rsc_to_xqp(res)->sq):&(rsc_to_xqp(res)->rq);
+ flush_wqe_cnt = wq->flush_wqe_cnt;
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "is_sq %d, flush_wq_cnt %d, ne %d, npolled %d, qp_id %d\n",
+ err_qp_node->is_sq, wq->flush_wqe_cnt, ne, *npolled, qp_id);
+
+ if (flush_wqe_cnt <= (ne - *npolled)) {
+ while (sw_npolled < flush_wqe_cnt) {
+ idx = wq->tail & (wq->wqe_cnt - 1);
+ if (err_qp_node->is_sq && !wq->need_flush[idx]) {
+ wq->tail++;
+ continue;
+ } else {
+ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq,
+ idx, wc + *npolled + sw_npolled);
+ ++sw_npolled;
+ }
+ }
+ list_del(&err_qp_node->entry);
+ free(err_qp_node);
+ *npolled += sw_npolled;
+ } else {
+ while (sw_npolled < (ne - *npolled)) {
+ idx = wq->tail & (wq->wqe_cnt - 1);
+ if (err_qp_node->is_sq && !wq->need_flush[idx]) {
+ wq->tail++;
+ continue;
+ } else {
+ gen_flush_err_cqe(err_qp_node, err_qp_node->qp_id, wq,
+ idx, wc + *npolled + sw_npolled);
+ ++sw_npolled;
+ }
+ }
+ *npolled = ne;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) ALWAYS_INLINE;
+static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+ struct xsc_cq *cq = to_xcq(ibcq);
+ struct xsc_resource *rsc = NULL;
+ int npolled = 0;
+ int err = CQ_OK;
+ uint32_t next_cid = cq->cons_index;
+
+ xsc_spin_lock(&cq->lock);
+ for (npolled = 0; npolled < ne; ++npolled) {
+ err = xsc_poll_one(cq, &rsc, wc + npolled);
+ if (err != CQ_OK)
+ break;
+ }
+
+ if (err == CQ_EMPTY) {
+ if (npolled < ne && !(list_empty(&cq->err_state_qp_list))) {
+ xsc_generate_flush_err_cqe(ibcq, ne, &npolled, wc);
+ }
+ }
+
+ udma_to_device_barrier();
+ if (next_cid != cq->cons_index)
+ update_cons_index(cq);
+ xsc_spin_unlock(&cq->lock);
+
+ return err == CQ_POLL_ERR ? err : npolled;
+}
+
+enum polling_mode {
+ POLLING_MODE_NO_STALL,
+ POLLING_MODE_STALL,
+ POLLING_MODE_STALL_ADAPTIVE
+};
+
+static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq,
+ int lock, enum polling_mode stall)
+ ALWAYS_INLINE;
+static inline void _xsc_end_poll(struct ibv_cq_ex *ibcq,
+ int lock, enum polling_mode stall)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ update_cons_index(cq);
+
+ if (lock)
+ xsc_spin_unlock(&cq->lock);
+
+ if (stall) {
+ if (stall == POLLING_MODE_STALL_ADAPTIVE) {
+ if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) {
+ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step,
+ xsc_stall_cq_poll_min);
+ xsc_get_cycles(&cq->stall_last_count);
+ } else if (cq->flags & XSC_CQ_FLAGS_EMPTY_DURING_POLL) {
+ cq->stall_cycles = min(cq->stall_cycles + xsc_stall_cq_inc_step,
+ xsc_stall_cq_poll_max);
+ xsc_get_cycles(&cq->stall_last_count);
+ } else {
+ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step,
+ xsc_stall_cq_poll_min);
+ cq->stall_last_count = 0;
+ }
+ } else if (!(cq->flags & XSC_CQ_FLAGS_FOUND_CQES)) {
+ cq->stall_next_poll = 1;
+ }
+
+ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES | XSC_CQ_FLAGS_EMPTY_DURING_POLL);
+ }
+}
+
+static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
+ int lock, enum polling_mode stall,
+ int cqe_version, int clock_update)
+ ALWAYS_INLINE;
+static inline int xsc_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
+ int lock, enum polling_mode stall,
+ int cqe_version, int clock_update)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+ struct xsc_cqe64 *cqe64;
+ void *cqe;
+ int err;
+
+ if (unlikely(attr->comp_mask))
+ return EINVAL;
+
+ if (stall) {
+ if (stall == POLLING_MODE_STALL_ADAPTIVE) {
+ if (cq->stall_last_count)
+ xsc_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
+ } else if (cq->stall_next_poll) {
+ cq->stall_next_poll = 0;
+ xsc_stall_poll_cq();
+ }
+ }
+
+ if (lock)
+ xsc_spin_lock(&cq->lock);
+
+ cq->cur_rsc = NULL;
+
+ err = xsc_get_next_cqe(cq, &cqe64, &cqe);
+ if (err == CQ_EMPTY) {
+ if (lock)
+ xsc_spin_unlock(&cq->lock);
+
+ if (stall) {
+ if (stall == POLLING_MODE_STALL_ADAPTIVE) {
+ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step,
+ xsc_stall_cq_poll_min);
+ xsc_get_cycles(&cq->stall_last_count);
+ } else {
+ cq->stall_next_poll = 1;
+ }
+ }
+
+ return ENOENT;
+ }
+
+ if (stall)
+ cq->flags |= XSC_CQ_FLAGS_FOUND_CQES;
+
+ err = xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
+ if (lock && err)
+ xsc_spin_unlock(&cq->lock);
+
+ if (stall && err) {
+ if (stall == POLLING_MODE_STALL_ADAPTIVE) {
+ cq->stall_cycles = max(cq->stall_cycles - xsc_stall_cq_dec_step,
+ xsc_stall_cq_poll_min);
+ cq->stall_last_count = 0;
+ }
+
+ cq->flags &= ~(XSC_CQ_FLAGS_FOUND_CQES);
+
+ goto out;
+ }
+
+ if (clock_update && !err)
+ err = xscdv_get_clock_info(ibcq->context, &cq->last_clock_info);
+
+out:
+ return err;
+}
+
+static inline int xsc_next_poll(struct ibv_cq_ex *ibcq,
+ enum polling_mode stall, int cqe_version)
+ ALWAYS_INLINE;
+static inline int xsc_next_poll(struct ibv_cq_ex *ibcq,
+ enum polling_mode stall,
+ int cqe_version)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+ struct xsc_cqe64 *cqe64;
+ void *cqe;
+ int err;
+
+ err = xsc_get_next_cqe(cq, &cqe64, &cqe);
+ if (err == CQ_EMPTY) {
+ if (stall == POLLING_MODE_STALL_ADAPTIVE)
+ cq->flags |= XSC_CQ_FLAGS_EMPTY_DURING_POLL;
+
+ return ENOENT;
+ }
+
+ return xsc_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
+}
+
+static inline int xsc_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq)
+{
+ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0);
+}
+
+static inline int xsc_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq)
+{
+ return xsc_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1);
+}
+
+static inline int xsc_next_poll_v0(struct ibv_cq_ex *ibcq)
+{
+ return xsc_next_poll(ibcq, 0, 0);
+}
+
+static inline int xsc_next_poll_v1(struct ibv_cq_ex *ibcq)
+{
+ return xsc_next_poll(ibcq, 0, 1);
+}
+
+static inline int xsc_start_poll_v0(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, 0, 0, 0);
+}
+
+static inline int xsc_start_poll_v1(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, 0, 1, 0);
+}
+
+static inline int xsc_start_poll_v0_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, 0, 0, 0);
+}
+
+static inline int xsc_start_poll_v1_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, 0, 1, 0);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 0);
+}
+
+static inline int xsc_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 0);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 0);
+}
+
+static inline int xsc_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 0);
+}
+
+static inline int xsc_start_poll_stall_v0(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 0);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 0);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 0);
+}
+
+static inline int xsc_start_poll_stall_v1(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 0);
+}
+
+static inline int xsc_start_poll_v0_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, 0, 0, 1);
+}
+
+static inline int xsc_start_poll_v1_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, 0, 1, 1);
+}
+
+static inline int xsc_start_poll_v1_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, 0, 1, 1);
+}
+
+static inline int xsc_start_poll_v0_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, 0, 0, 1);
+}
+
+static inline int xsc_start_poll_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1, 1);
+}
+
+static inline int xsc_start_poll_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0, 1);
+}
+
+static inline int xsc_start_poll_stall_v1_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1, 1);
+}
+
+static inline int xsc_start_poll_stall_v0_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0, 1);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v0_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0, 1);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v1_lock_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1, 1);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v0_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0, 1);
+}
+
+static inline int xsc_start_poll_adaptive_stall_v1_clock_update(struct ibv_cq_ex *ibcq,
+ struct ibv_poll_cq_attr *attr)
+{
+ return xsc_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1, 1);
+}
+
+static inline void xsc_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE);
+}
+
+static inline void xsc_end_poll_stall_lock(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 1, POLLING_MODE_STALL);
+}
+
+static inline void xsc_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE);
+}
+
+static inline void xsc_end_poll_stall(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 0, POLLING_MODE_STALL);
+}
+
+static inline void xsc_end_poll(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 0, 0);
+}
+
+static inline void xsc_end_poll_lock(struct ibv_cq_ex *ibcq)
+{
+ _xsc_end_poll(ibcq, 1, 0);
+}
+
+int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+ return poll_cq(ibcq, ne, wc);
+}
+
+static inline enum ibv_wc_opcode xsc_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ switch (xscdv_get_cqe_opcode(cq->cqe64)) {
+ case XSC_CQE_RESP_WR_IMM:
+ return IBV_WC_RECV_RDMA_WITH_IMM;
+ case XSC_CQE_RESP_SEND:
+ case XSC_CQE_RESP_SEND_IMM:
+ case XSC_CQE_RESP_SEND_INV:
+ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) {
+ switch (cq->cqe64->app_op) {
+ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV:
+ case XSC_CQE_APP_OP_TM_CONSUMED_MSG:
+ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV:
+ case XSC_CQE_APP_OP_TM_EXPECTED:
+ case XSC_CQE_APP_OP_TM_UNEXPECTED:
+ return IBV_WC_TM_RECV;
+ case XSC_CQE_APP_OP_TM_NO_TAG:
+ return IBV_WC_TM_NO_TAG;
+ }
+ }
+ return IBV_WC_RECV;
+ case XSC_CQE_NO_PACKET:
+ switch (cq->cqe64->app_op) {
+ case XSC_CQE_APP_OP_TM_REMOVE:
+ return IBV_WC_TM_DEL;
+ case XSC_CQE_APP_OP_TM_APPEND:
+ return IBV_WC_TM_ADD;
+ case XSC_CQE_APP_OP_TM_NOOP:
+ return IBV_WC_TM_SYNC;
+ case XSC_CQE_APP_OP_TM_CONSUMED:
+ return IBV_WC_TM_RECV;
+ }
+ break;
+ case XSC_CQE_REQ:
+ switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) {
+ case XSC_OPCODE_RDMA_WRITE_IMM:
+ case XSC_OPCODE_RDMA_WRITE:
+ return IBV_WC_RDMA_WRITE;
+ case XSC_OPCODE_SEND_IMM:
+ case XSC_OPCODE_SEND:
+ case XSC_OPCODE_SEND_INVAL:
+ return IBV_WC_SEND;
+ case XSC_OPCODE_RDMA_READ:
+ return IBV_WC_RDMA_READ;
+ case XSC_OPCODE_ATOMIC_CS:
+ return IBV_WC_COMP_SWAP;
+ case XSC_OPCODE_ATOMIC_FA:
+ return IBV_WC_FETCH_ADD;
+ case XSC_OPCODE_UMR:
+ return cq->umr_opcode;
+ case XSC_OPCODE_TSO:
+ return IBV_WC_TSO;
+ }
+ }
+
+ return 0;
+}
+
+static inline uint32_t xsc_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff;
+}
+
+static inline unsigned int xsc_cq_read_wc_flags(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+ int wc_flags = 0;
+
+ if (cq->flags & XSC_CQ_FLAGS_RX_CSUM_VALID)
+ wc_flags = get_csum_ok(cq->cqe64);
+
+ switch (xscdv_get_cqe_opcode(cq->cqe64)) {
+ case XSC_CQE_RESP_WR_IMM:
+ case XSC_CQE_RESP_SEND_IMM:
+ wc_flags |= IBV_WC_WITH_IMM;
+ break;
+ case XSC_CQE_RESP_SEND_INV:
+ wc_flags |= IBV_WC_WITH_INV;
+ break;
+ }
+
+ if (cq->flags & XSC_CQ_FLAGS_TM_SYNC_REQ)
+ wc_flags |= IBV_WC_TM_SYNC_REQ;
+
+ if (unlikely(cq->cqe64->app == XSC_CQE_APP_TAG_MATCHING)) {
+ switch (cq->cqe64->app_op) {
+ case XSC_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV:
+ case XSC_CQE_APP_OP_TM_CONSUMED_MSG:
+ case XSC_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED:
+ /* Full completion */
+ wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID);
+ break;
+ case XSC_CQE_APP_OP_TM_CONSUMED_SW_RDNV:
+ case XSC_CQE_APP_OP_TM_CONSUMED: /* First completion */
+ wc_flags |= IBV_WC_TM_MATCH;
+ break;
+ case XSC_CQE_APP_OP_TM_EXPECTED: /* Second completion */
+ wc_flags |= IBV_WC_TM_DATA_VALID;
+ break;
+ }
+ }
+
+ wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0;
+ return wc_flags;
+}
+
+static inline uint32_t xsc_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be32toh(cq->cqe64->byte_cnt);
+}
+
+static inline uint32_t xsc_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+ struct xsc_err_cqe *ecqe = (struct xsc_err_cqe *)cq->cqe64;
+
+ return ecqe->vendor_err_synd;
+}
+
+static inline __be32 xsc_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ switch (xscdv_get_cqe_opcode(cq->cqe64)) {
+ case XSC_CQE_RESP_SEND_INV:
+ /* This is returning invalidate_rkey which is in host order, see
+ * ibv_wc_read_invalidated_rkey
+ */
+ return (__force __be32)be32toh(cq->cqe64->imm_inval_pkey);
+ default:
+ return cq->cqe64->imm_inval_pkey;
+ }
+}
+
+static inline uint32_t xsc_cq_read_wc_slid(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return (uint32_t)be16toh(cq->cqe64->slid);
+}
+
+static inline uint8_t xsc_cq_read_wc_sl(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf;
+}
+
+static inline uint32_t xsc_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be32toh(cq->cqe64->flags_rqpn) & 0xffffff;
+}
+
+static inline uint8_t xsc_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return cq->cqe64->ml_path & 0x7f;
+}
+
+static inline uint64_t xsc_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be64toh(cq->cqe64->timestamp);
+}
+
+static inline uint64_t
+xsc_cq_read_wc_completion_wallclock_ns(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return xscdv_ts_to_ns(&cq->last_clock_info,
+ xsc_cq_read_wc_completion_ts(ibcq));
+}
+
+static inline uint16_t xsc_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be16toh(cq->cqe64->vlan_info);
+}
+
+static inline uint32_t xsc_cq_read_flow_tag(struct ibv_cq_ex *ibcq)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ return be32toh(cq->cqe64->sop_drop_qpn) & XSC_FLOW_TAG_MASK;
+}
+
+static inline void xsc_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq,
+ struct ibv_wc_tm_info *tm_info)
+{
+ struct xsc_cq *cq = to_xcq(ibv_cq_ex_to_cq(ibcq));
+
+ tm_info->tag = be64toh(cq->cqe64->tmh.tag);
+ tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx);
+}
+
+#define BIT(i) (1UL << (i))
+
+#define SINGLE_THREADED BIT(0)
+#define STALL BIT(1)
+#define V1 BIT(2)
+#define ADAPTIVE BIT(3)
+#define CLOCK_UPDATE BIT(4)
+
+#define xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update) \
+ xsc_start_poll##adaptive##stall##cqe_ver##lock##clock_update
+#define xsc_next_poll_name(cqe_ver, adaptive) \
+ xsc_next_poll##adaptive##cqe_ver
+#define xsc_end_poll_name(lock, stall, adaptive) \
+ xsc_end_poll##adaptive##stall##lock
+
+#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive, clock_update) { \
+ .start_poll = &xsc_start_poll_name(cqe_ver, lock, stall, adaptive, clock_update), \
+ .next_poll = &xsc_next_poll_name(cqe_ver, adaptive), \
+ .end_poll = &xsc_end_poll_name(lock, stall, adaptive), \
+ }
+
+static const struct op
+{
+ int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr);
+ int (*next_poll)(struct ibv_cq_ex *ibcq);
+ void (*end_poll)(struct ibv_cq_ex *ibcq);
+} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + CLOCK_UPDATE + 1] = {
+ [V1] = POLL_FN_ENTRY(_v1, _lock, , ,),
+ [0] = POLL_FN_ENTRY(_v0, _lock, , ,),
+ [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , , ),
+ [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , , ),
+ [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, , ),
+ [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, , ),
+ [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, , ),
+ [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, , ),
+ [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, ),
+ [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, ),
+ [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, ),
+ [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, ),
+ [V1 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, , , _clock_update),
+ [0 | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, , , _clock_update),
+ [V1 | SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , , , _clock_update),
+ [SINGLE_THREADED | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , , , _clock_update),
+ [V1 | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, , _clock_update),
+ [STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, , _clock_update),
+ [V1 | SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, , _clock_update),
+ [SINGLE_THREADED | STALL | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, , _clock_update),
+ [V1 | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive, _clock_update),
+ [STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive, _clock_update),
+ [V1 | SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive, _clock_update),
+ [SINGLE_THREADED | STALL | ADAPTIVE | CLOCK_UPDATE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive, _clock_update),
+};
+
+int xsc_cq_fill_pfns(struct xsc_cq *cq,
+ const struct ibv_cq_init_attr_ex *cq_attr,
+ struct xsc_context *xctx)
+{
+ const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) |
+ (xctx->cqe_version ? V1 : 0) |
+ (cq->flags & XSC_CQ_FLAGS_SINGLE_THREADED ?
+ SINGLE_THREADED : 0) |
+ (cq->stall_enable ? STALL : 0) |
+ ((cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) ?
+ CLOCK_UPDATE : 0)];
+
+ cq->verbs_cq.cq_ex.start_poll = poll_ops->start_poll;
+ cq->verbs_cq.cq_ex.next_poll = poll_ops->next_poll;
+ cq->verbs_cq.cq_ex.end_poll = poll_ops->end_poll;
+
+ cq->verbs_cq.cq_ex.read_opcode = xsc_cq_read_wc_opcode;
+ cq->verbs_cq.cq_ex.read_vendor_err = xsc_cq_read_wc_vendor_err;
+ cq->verbs_cq.cq_ex.read_wc_flags = xsc_cq_read_wc_flags;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+ cq->verbs_cq.cq_ex.read_byte_len = xsc_cq_read_wc_byte_len;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM)
+ cq->verbs_cq.cq_ex.read_imm_data = xsc_cq_read_wc_imm_data;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
+ cq->verbs_cq.cq_ex.read_qp_num = xsc_cq_read_wc_qp_num;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
+ cq->verbs_cq.cq_ex.read_src_qp = xsc_cq_read_wc_src_qp;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID)
+ cq->verbs_cq.cq_ex.read_slid = xsc_cq_read_wc_slid;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL)
+ cq->verbs_cq.cq_ex.read_sl = xsc_cq_read_wc_sl;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+ cq->verbs_cq.cq_ex.read_dlid_path_bits = xsc_cq_read_wc_dlid_path_bits;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
+ cq->verbs_cq.cq_ex.read_completion_ts = xsc_cq_read_wc_completion_ts;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN)
+ cq->verbs_cq.cq_ex.read_cvlan = xsc_cq_read_wc_cvlan;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
+ cq->verbs_cq.cq_ex.read_flow_tag = xsc_cq_read_flow_tag;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO)
+ cq->verbs_cq.cq_ex.read_tm_info = xsc_cq_read_wc_tm_info;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK) {
+ if (!xctx->clock_info_page)
+ return EOPNOTSUPP;
+ cq->verbs_cq.cq_ex.read_completion_wallclock_ns =
+ xsc_cq_read_wc_completion_wallclock_ns;
+ }
+
+ return 0;
+}
+
+int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited)
+{
+ struct xsc_cq *cq = to_xcq(ibvcq);
+ union xsc_db_data doorbell;
+
+ doorbell.cqn = cq->cqn;
+ doorbell.cq_next_cid = cq->cons_index;
+ doorbell.solicited = !!solicited;
+
+ /*
+ * Make sure that the doorbell record in host memory is
+ * written before ringing the doorbell via PCI WC MMIO.
+ */
+ mmio_wc_start();
+
+ WR_REG(cq->armdb, doorbell.raw_data);
+
+ mmio_flush_writes();
+
+ return 0;
+}
+
+void xsc_cq_event(struct ibv_cq *cq)
+{
+ to_xcq(cq)->arm_sn++;
+}
+
+static int is_equal_rsn(struct xsc_cqe64 *cqe64, uint32_t rsn)
+{
+ return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff);
+}
+
+static inline int is_equal_uidx(struct xsc_cqe64 *cqe64, uint32_t uidx)
+{
+ return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff);
+}
+
+static inline int is_responder(uint8_t opcode)
+{
+ switch (opcode) {
+ case XSC_CQE_RESP_WR_IMM:
+ case XSC_CQE_RESP_SEND:
+ case XSC_CQE_RESP_SEND_IMM:
+ case XSC_CQE_RESP_SEND_INV:
+ case XSC_CQE_RESP_ERR:
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int free_res_cqe(struct xsc_cqe64 *cqe64, uint32_t rsn, int cqe_version)
+{
+ if (cqe_version) {
+ if (is_equal_uidx(cqe64, rsn)) {
+ return 1;
+ }
+ } else {
+ if (is_equal_rsn(cqe64, rsn)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void __xsc_cq_clean(struct xsc_cq *cq, uint32_t rsn)
+{
+ uint32_t prod_index;
+ int nfreed = 0;
+ struct xsc_cqe64 *cqe64, *dest64;
+ void *cqe, *dest;
+ uint8_t owner_bit;
+ int cqe_version;
+
+ if (!cq || cq->flags & XSC_CQ_FLAGS_DV_OWNED)
+ return;
+ xsc_dbg(to_xctx(cq->verbs_cq.cq_ex.context)->dbg_fp, XSC_DBG_CQ, "\n");
+
+ /*
+ * First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
+ if (prod_index == cq->cons_index + cq->verbs_cq.cq_ex.cqe)
+ break;
+
+ /*
+ * Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ cqe_version = (to_xctx(cq->verbs_cq.cq_ex.context))->cqe_version;
+ while ((int) --prod_index - (int) cq->cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & (cq->verbs_cq.cq_ex.cqe - 1));
+ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
+ if (free_res_cqe(cqe64, rsn, cqe_version)) {
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe(cq, (prod_index + nfreed) & (cq->verbs_cq.cq_ex.cqe - 1));
+ dest64 = (cq->cqe_sz == 64) ? dest : dest + 64;
+ owner_bit = dest64->op_own & XSC_CQE_OWNER_MASK;
+ memcpy(dest, cqe, cq->cqe_sz);
+ dest64->op_own = owner_bit |
+ (dest64->op_own & ~XSC_CQE_OWNER_MASK);
+ }
+ }
+
+ if (nfreed) {
+ cq->cons_index += nfreed;
+ /*
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
+ udma_to_device_barrier();
+ update_cons_index(cq);
+ }
+}
+
+void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn)
+{
+ xsc_spin_lock(&cq->lock);
+ __xsc_cq_clean(cq, qpn);
+ xsc_spin_unlock(&cq->lock);
+}
+
+int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq,
+ struct xsc_buf *buf, int nent, int cqe_sz)
+{
+ struct xsc_device *xdev = to_xdev(xctx->ibv_ctx.context.device);
+ int ret;
+ enum xsc_alloc_type type;
+ enum xsc_alloc_type default_type = XSC_ALLOC_TYPE_ANON;
+
+ if (xsc_use_huge("HUGE_CQ"))
+ default_type = XSC_ALLOC_TYPE_HUGE;
+
+ xsc_get_alloc_type(xctx, XSC_CQ_PREFIX, &type, default_type);
+
+ ret = xsc_alloc_prefered_buf(xctx, buf,
+ align(nent * cqe_sz, xdev->page_size),
+ xdev->page_size,
+ type,
+ XSC_CQ_PREFIX);
+
+ if (ret)
+ return -1;
+
+ memset(buf->buf, 0, nent * cqe_sz);
+
+ return 0;
+}
+
+int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf)
+{
+ return xsc_free_actual_buf(ctx, buf);
+}
diff --git a/providers/xscale/cqm_csr_defines.h b/providers/xscale/cqm_csr_defines.h
new file mode 100644
index 0000000..9d87438
--- /dev/null
+++ b/providers/xscale/cqm_csr_defines.h
@@ -0,0 +1,180 @@
+#ifndef _CQM_CSR_DEFINES_H_
+#define _CQM_CSR_DEFINES_H_
+
+#define CQM_SOFT_RESET_REG_ADDR 0x6000
+#define CQM_SOFT_RESET_MASK 0x1
+#define CQM_SOFT_RESET_SHIFT 0
+
+#define CQM_COUNTER_CONFIG_REG_ADDR 0x6020
+#define CQM_CFG_CNT_WRAP_MASK 0x1
+#define CQM_CFG_CNT_WRAP_SHIFT 0
+#define CQM_CFG_CNT_RC_MASK 0x2
+#define CQM_CFG_CNT_RC_SHIFT 1
+
+#define CQM_SCRATCH_PAD_REG_ADDR 0x6040
+#define CQM_SCRATCH_PAD_MASK 0xffffffffffffffff
+#define CQM_SCRATCH_PAD_SHIFT 0
+
+#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x6060
+#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 16
+#define CQM_CQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20
+#define CQM_CFG_CPU2CQM_RING_ADDR_MASK 0xffffffffffffffff
+#define CQM_CFG_CPU2CQM_RING_ADDR_SHIFT 0
+
+#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x6260
+#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 16
+#define CQM_CQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20
+#define CQM_CFG_CPU2CQM_RING_SIZE_MASK 0xffff
+#define CQM_CFG_CPU2CQM_RING_SIZE_SHIFT 0
+
+#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_ADDR 0x6460
+#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_SIZE 16
+#define CQM_CQM_CONFIG_REG_NEXT_CID_ARRAY_STRIDE 0x20
+#define CQM_CFG_CPU2CQM_NEXT_CID_MASK 0xffff
+#define CQM_CFG_CPU2CQM_NEXT_CID_SHIFT 0
+
+#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x6660
+#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 16
+#define CQM_CQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20
+#define CQM_CFG_CPU2CQM_CFG_EN_MASK 0x1
+#define CQM_CFG_CPU2CQM_CFG_EN_SHIFT 0
+
+#define CQM_CQM_CONFIG_CQE_FIFO_TH_ADDR 0x6860
+#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_MASK 0xff
+#define CQM_CFG_CPU2CQM_CQE_FIFO_AFULL_TH_SHIFT 0
+#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_MASK 0xff00
+#define CQM_CFG_CPU2CQM_CQE_FIFO_AMTY_TH_SHIFT 8
+
+#define CQM_CQM_CONFIG_CID_FIFO_TH_ADDR 0x6880
+#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_MASK 0xff
+#define CQM_CFG_CPU2CQM_CID_FIFO_AFULL_TH_SHIFT 0
+#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_MASK 0xff00
+#define CQM_CFG_CPU2CQM_CID_FIFO_AMTY_TH_SHIFT 8
+
+#define CQM_CQM_STATUS_REG_ARRAY_ADDR 0x68a0
+#define CQM_CQM_STATUS_REG_ARRAY_SIZE 16
+#define CQM_CQM_STATUS_REG_ARRAY_STRIDE 0x20
+#define CQM_CFG_CQM2CPU_DONE_PID_MASK 0xffff
+#define CQM_CFG_CQM2CPU_DONE_PID_SHIFT 0
+
+#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_ADDR 0x6aa0
+#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_SIZE 16
+#define CQM_CQM_STATUS_LOCAL_NEXT_PID_REG_ARRAY_STRIDE 0x20
+#define CQM_CQM_LOCAL_NEXT_PID_MASK 0xffff
+#define CQM_CQM_LOCAL_NEXT_PID_SHIFT 0
+
+#define CQM_CQM_DMA_REQ_LEN_STATE_REG_ADDR 0x6ca0
+#define CQM_CQM_DMA_REQ_LEN_MASK 0x3ff
+#define CQM_CQM_DMA_REQ_LEN_SHIFT 0
+
+#define CQM_CQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x6cc0
+#define CQM_CQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_REQ_ADDR_SHIFT 0
+
+#define CQM_CQM_CQE_L_QPID_STATE_REG_ADDR 0x6ce0
+#define CQM_CQM_CQE_L_QP_ID_MASK 0xffffff
+#define CQM_CQM_CQE_L_QP_ID_SHIFT 0
+
+#define CQM_CQM_CQE_MSG_LEN_STATE_REG_ADDR 0x6d00
+#define CQM_CQM_CQE_MSG_LEN_MASK 0xffffffff
+#define CQM_CQM_CQE_MSG_LEN_SHIFT 0
+
+#define CQM_CQM_CQE_ERR_CODE_STATE_REG_ADDR 0x6d20
+#define CQM_CQM_CQE_ERR_CODE_MASK 0xff
+#define CQM_CQM_CQE_ERR_CODE_SHIFT 0
+
+#define CQM_CQM_CQE_MSG_OPCODE_STATE_REG_ADDR 0x6d40
+#define CQM_CQM_CQE_MSG_OPCODE_MASK 0xff
+#define CQM_CQM_CQE_MSG_OPCODE_SHIFT 0
+
+#define CQM_CQM_CQE_WQEID_STATE_REG_ADDR 0x6d60
+#define CQM_CQM_CQE_WQEID_MASK 0xffff
+#define CQM_CQM_CQE_WQEID_SHIFT 0
+
+#define CQM_CQM_CQE_TX0RX1_STATE_REG_ADDR 0x6d80
+#define CQM_CQM_CQE_TX0RX1_MASK 0x1
+#define CQM_CQM_CQE_TX0RX1_SHIFT 0
+
+#define CQM_CQM_CQE_CQ_ID_STATE_REG_ADDR 0x6da0
+#define CQM_CQM_CQE_CQ_ID_MASK 0xf
+#define CQM_CQM_CQE_CQ_ID_SHIFT 0
+
+#define CQM_CQM_WR_ACK_CNT_STATE_REG_ADDR 0x6dc0
+#define CQM_CQM_DMA_WR_ACK_MASK 0xff
+#define CQM_CQM_DMA_WR_ACK_SHIFT 0
+
+#define CQM_CQM_RD_ACK_CNT_STATE_REG_ADDR 0x6de0
+#define CQM_CQM_DMA_RD_ACK_MASK 0xff
+#define CQM_CQM_DMA_RD_ACK_SHIFT 0
+
+#define CQM_CQM_CQE_ACK_CNT_STATE_REG_ADDR 0x6e00
+#define CQM_CQM_DMA_CQE_ACK_MASK 0xff
+#define CQM_CQM_DMA_CQE_ACK_SHIFT 0
+
+#define CQM_CQM_CMD_FIFO_STATE_REG_ADDR 0x6e20
+#define CQM_CQM_FIFO_OVFL_INT_MASK 0x3
+#define CQM_CQM_FIFO_OVFL_INT_SHIFT 0
+#define CQM_CQM_FIFO_UNFL_INT_MASK 0xc
+#define CQM_CQM_FIFO_UNFL_INT_SHIFT 2
+#define CQM_CQM_FIFO_MTY_MASK 0x30
+#define CQM_CQM_FIFO_MTY_SHIFT 4
+#define CQM_CQM_FIFO_FUL_MASK 0xc0
+#define CQM_CQM_FIFO_FUL_SHIFT 6
+#define CQM_CQM_RING_FULL_INT_MASK 0xffff00
+#define CQM_CQM_RING_FULL_INT_SHIFT 8
+#define CQM_CQM_DEFINE_ERR_INT_MASK 0x1000000
+#define CQM_CQM_DEFINE_ERR_INT_SHIFT 24
+#define CQM_CQM_SOP_EOP_NO_EQUAL_MASK 0x2000000
+#define CQM_CQM_SOP_EOP_NO_EQUAL_SHIFT 25
+
+#define CQM_CQM_FIFO_USED_CNT_REG_ADDR 0x6e40
+#define CQM_CQM_FIFO_USED_CNT_REG_SIZE 2
+#define CQM_CQM_FIFO_USED_CNT_REG_STRIDE 0x20
+#define CQM_CQM_FIFO_USED_CNT_MASK 0x7f
+#define CQM_CQM_FIFO_USED_CNT_SHIFT 0
+
+#define CQM_CQM_DEBUG_INFO_STATE_REG_0_ADDR 0x6e80
+#define CQM_CQM2CSR_DBG_OPCODE_MASK 0xff
+#define CQM_CQM2CSR_DBG_OPCODE_SHIFT 0
+#define CQM_CQM2CSR_DBG_TX0_RX1_MASK 0x100
+#define CQM_CQM2CSR_DBG_TX0_RX1_SHIFT 8
+#define CQM_CQM2CSR_DBG_CAP_MASK 0x200
+#define CQM_CQM2CSR_DBG_CAP_SHIFT 9
+#define CQM_CQM2CSR_DBG_L_QPID_MASK 0x1c00
+#define CQM_CQM2CSR_DBG_L_QPID_SHIFT 10
+#define CQM_CQM2CSR_DBG_SN_MASK 0x1fffffe000
+#define CQM_CQM2CSR_DBG_SN_SHIFT 13
+
+#define CQM_CQM_DEBUG_INFO_STATE_REG_1_ADDR 0x6ea0
+#define CQM_CQM2CSR_DBG_MOD_IF_BM_MASK 0xffffffffffffffff
+#define CQM_CQM2CSR_DBG_MOD_IF_BM_SHIFT 0
+
+#define CQM_CQM_DMA_IN_SOP_CNT_REG_ADDR 0x6ec0
+#define CQM_CQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_IN_SOP_CNT_SHIFT 0
+
+#define CQM_CQM_DMA_IN_EOP_CNT_REG_ADDR 0x6ee0
+#define CQM_CQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_IN_EOP_CNT_SHIFT 0
+
+#define CQM_CQM_DMA_IN_VLD_CNT_REG_ADDR 0x6f00
+#define CQM_CQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_IN_VLD_CNT_SHIFT 0
+
+#define CQM_CQM_DMA_REQ_CNT_REG_ADDR 0x6f20
+#define CQM_CQM_DMA_REQ_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_REQ_CNT_SHIFT 0
+
+#define CQM_CQM_DMA_GNT_CNT_REG_ADDR 0x6f40
+#define CQM_CQM_DMA_GNT_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_GNT_CNT_SHIFT 0
+
+#define CQM_CQM_DMA_ACK_VLD_CNT_REG_ADDR 0x6f60
+#define CQM_CQM_DMA_ACK_VLD_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_DMA_ACK_VLD_CNT_SHIFT 0
+
+#define CQM_CQM_MER2CQM_VLD_CNT_REG_ADDR 0x6f80
+#define CQM_CQM_MER2CQM_VLD_CNT_MASK 0xffffffffffffffff
+#define CQM_CQM_MER2CQM_VLD_CNT_SHIFT 0
+
+#endif
diff --git a/providers/xscale/dbrec.c b/providers/xscale/dbrec.c
new file mode 100644
index 0000000..3987b88
--- /dev/null
+++ b/providers/xscale/dbrec.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#define _GNU_SOURCE
+#include <config.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "xscale.h"
+
+struct xsc_db_page {
+ struct xsc_db_page *prev, *next;
+ struct xsc_buf buf;
+ int num_db;
+ int use_cnt;
+ unsigned long free[0];
+};
+
+static struct xsc_db_page *__add_page(struct xsc_context *context)
+{
+ struct xsc_db_page *page;
+ int ps = to_xdev(context->ibv_ctx.context.device)->page_size;
+ int pp;
+ int i;
+ int nlong;
+ int ret;
+
+ pp = ps / context->cache_line_size;
+ nlong = (pp + 8 * sizeof(long) - 1) / (8 * sizeof(long));
+
+ page = malloc(sizeof *page + nlong * sizeof(long));
+ if (!page)
+ return NULL;
+
+ if (xsc_is_extern_alloc(context))
+ ret = xsc_alloc_buf_extern(context, &page->buf, ps);
+ else
+ ret = xsc_alloc_buf(&page->buf, ps, ps);
+ if (ret) {
+ free(page);
+ return NULL;
+ }
+
+ page->num_db = pp;
+ page->use_cnt = 0;
+ for (i = 0; i < nlong; ++i)
+ page->free[i] = ~0;
+
+ page->prev = NULL;
+ page->next = context->db_list;
+ context->db_list = page;
+ if (page->next)
+ page->next->prev = page;
+
+ return page;
+}
+
+__be32 *xsc_alloc_dbrec(struct xsc_context *context)
+{
+ struct xsc_db_page *page;
+ __be32 *db = NULL;
+ int i, j;
+
+ pthread_mutex_lock(&context->db_list_mutex);
+
+ for (page = context->db_list; page; page = page->next)
+ if (page->use_cnt < page->num_db)
+ goto found;
+
+ page = __add_page(context);
+ if (!page)
+ goto out;
+
+found:
+ ++page->use_cnt;
+
+ for (i = 0; !page->free[i]; ++i)
+ /* nothing */;
+
+ j = ffsl(page->free[i]);
+ --j;
+ page->free[i] &= ~(1UL << j);
+ db = page->buf.buf + (i * 8 * sizeof(long) + j) * context->cache_line_size;
+
+out:
+ pthread_mutex_unlock(&context->db_list_mutex);
+
+ return db;
+}
+
+void xsc_free_db(struct xsc_context *context, __be32 *db)
+{
+ struct xsc_db_page *page;
+ uintptr_t ps = to_xdev(context->ibv_ctx.context.device)->page_size;
+ int i;
+
+ pthread_mutex_lock(&context->db_list_mutex);
+
+ for (page = context->db_list; page; page = page->next)
+ if (((uintptr_t) db & ~(ps - 1)) == (uintptr_t) page->buf.buf)
+ break;
+
+ if (!page)
+ goto out;
+
+ i = ((void *) db - page->buf.buf) / context->cache_line_size;
+ page->free[i / (8 * sizeof(long))] |= 1UL << (i % (8 * sizeof(long)));
+
+ if (!--page->use_cnt) {
+ if (page->prev)
+ page->prev->next = page->next;
+ else
+ context->db_list = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+
+ if (page->buf.type == XSC_ALLOC_TYPE_EXTERNAL)
+ xsc_free_buf_extern(context, &page->buf);
+ else
+ xsc_free_buf(&page->buf);
+
+ free(page);
+ }
+
+out:
+ pthread_mutex_unlock(&context->db_list_mutex);
+}
diff --git a/providers/xscale/libxsc.map b/providers/xscale/libxsc.map
new file mode 100644
index 0000000..005c161
--- /dev/null
+++ b/providers/xscale/libxsc.map
@@ -0,0 +1,59 @@
+/* Export symbols should be added below according to
+ Documentation/versioning.md document. */
+XSC_1.0 {
+ global:
+ xscdv_query_device;
+ xscdv_init_obj;
+ local: *;
+};
+
+XSC_1.1 {
+ global:
+ xscdv_create_cq;
+} XSC_1.0;
+
+XSC_1.2 {
+ global:
+ xscdv_init_obj;
+ xscdv_set_context_attr;
+} XSC_1.1;
+
+XSC_1.3 {
+ global:
+ xscdv_create_qp;
+ xscdv_create_wq;
+} XSC_1.2;
+
+XSC_1.4 {
+ global:
+ xscdv_get_clock_info;
+} XSC_1.3;
+
+XSC_1.5 {
+ global:
+ xscdv_create_flow_action_esp;
+} XSC_1.4;
+
+XSC_1.6 {
+ global:
+ xscdv_create_flow_matcher;
+ xscdv_destroy_flow_matcher;
+ xscdv_create_flow;
+} XSC_1.5;
+
+XSC_1.7 {
+ global:
+ xscdv_create_flow_action_modify_header;
+ xscdv_create_flow_action_packet_reformat;
+ xscdv_devx_alloc_uar;
+ xscdv_devx_free_uar;
+ xscdv_devx_general_cmd;
+ xscdv_devx_obj_create;
+ xscdv_devx_obj_destroy;
+ xscdv_devx_obj_modify;
+ xscdv_devx_obj_query;
+ xscdv_devx_query_eqn;
+ xscdv_devx_umem_dereg;
+ xscdv_devx_umem_reg;
+ xscdv_open_device;
+} XSC_1.6;
diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c
new file mode 100644
index 0000000..04e87e2
--- /dev/null
+++ b/providers/xscale/qp.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <util/mmio.h>
+#include <util/compiler.h>
+
+#include "xscale.h"
+#include "wqe.h"
+#include "xsc_hsi.h"
+
+static const uint32_t xsc_ib_opcode[] = {
+ [IBV_WR_SEND] = XSC_MSG_OPCODE_SEND,
+ [IBV_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND,
+ [IBV_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE,
+ [IBV_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE,
+ [IBV_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ,
+ [IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND,
+};
+
+static void *get_recv_wqe(struct xsc_qp *qp, int n)
+{
+ return qp->rq_start + (n << qp->rq.wqe_shift);
+}
+
+static void *get_wq_recv_wqe(struct xsc_rwq *rwq, int n)
+{
+ return rwq->pbuff + (n << rwq->rq.wqe_shift);
+}
+
+static void *get_seg_wqe(void *first, int n)
+{
+ return first + (n << XSC_BASE_WQE_SHIFT);
+}
+
+void *xsc_get_send_wqe(struct xsc_qp *qp, int n)
+{
+ return qp->sq_start + (n << qp->sq.wqe_shift);
+}
+
+void xsc_init_rwq_indices(struct xsc_rwq *rwq)
+{
+ rwq->rq.head = 0;
+ rwq->rq.tail = 0;
+}
+
+void xsc_init_qp_indices(struct xsc_qp *qp)
+{
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.cur_post = 0;
+}
+
+static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq)
+{
+ unsigned cur;
+
+ cur = wq->head - wq->tail;
+ if (cur + nreq < wq->max_post)
+ return 0;
+
+ xsc_spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ xsc_spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max_post;
+}
+
+static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg,
+ uint32_t msg_len, uint64_t remote_addr, uint32_t rkey)
+{
+ WR_LE_32(remote_seg->seg_len, msg_len);
+ WR_LE_32(remote_seg->mkey, rkey);
+ WR_LE_64(remote_seg->va, remote_addr);
+}
+
+static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg, struct ibv_sge *sg)
+{
+ WR_LE_32(data_seg->seg_len, sg->length);
+ WR_LE_32(data_seg->mkey, sg->lkey);
+ WR_LE_64(data_seg->va, sg->addr);
+}
+
+static __be32 send_ieth(struct ibv_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IBV_WR_SEND_WITH_IMM:
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ return wr->imm_data;
+ default:
+ return 0;
+ }
+}
+
+static int set_data_inl_seg(struct xsc_qp *qp, struct ibv_send_wr *wr,
+ struct xsc_send_wqe_ctrl_seg *ctrl)
+{
+ void *data_seg;
+ unsigned seg_index;
+ void *addr;
+ int len = 0;
+ int i;
+ const int ds_len = sizeof(struct xsc_wqe_data_seg);
+ int left_len = 0;
+ int msg_len = ctrl->msg_len;
+
+ if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM)
+ seg_index = 1;
+ else
+ seg_index = 2;
+
+ if (unlikely(msg_len > qp->max_inline_data))
+ return ENOMEM;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ if (likely(wr->sg_list[i].length)) {
+ addr = (void*)wr->sg_list[i].addr;
+ len = wr->sg_list[i].length;
+ if (left_len > 0) {
+ int copy_len = min_t(int, len, left_len);
+ memcpy(data_seg, addr, copy_len);
+ addr += copy_len;
+ len -= copy_len;
+ }
+
+ while (len >= ds_len) {
+ data_seg = get_seg_wqe(ctrl, seg_index);
+ seg_index++;
+ memcpy(data_seg, addr, ds_len);
+ addr += ds_len;
+ len -= ds_len;
+ }
+
+ if (len > 0) {
+ data_seg = get_seg_wqe(ctrl, seg_index);
+ seg_index++;
+ memcpy(data_seg, addr, len);
+ data_seg += len;
+ left_len = ds_len - len;
+ } else {
+ left_len = 0;
+ }
+ }
+ }
+
+ ctrl->ds_data_num = seg_index - 1;
+
+ return 0;
+}
+
+static void zero_send_ds(int idx, struct xsc_qp *qp)
+{
+ void *seg;
+ uint64_t *uninitialized_var(p);
+ int i;
+
+ seg = (void*)xsc_get_send_wqe(qp, idx);
+ for (i = 1; i < qp->sq.seg_cnt; i++) {
+ p = get_seg_wqe(seg, i);
+ p[0] = p[1] = 0;
+ }
+}
+
+static void zero_recv_ds(int idx, struct xsc_qp *qp)
+{
+ void *seg;
+ uint64_t *uninitialized_var(p);
+ int i;
+
+ seg = (void*)get_recv_wqe(qp, idx);
+ for (i = 1; i < qp->rq.seg_cnt; i++) {
+ p = get_seg_wqe(seg, i);
+ p[0] = p[1] = 0;
+ }
+}
+
+#ifdef XSC_DEBUG
+static void dump_wqe(int type, int idx, struct xsc_qp *qp)
+{
+ /* type0 send type1 recv */
+ uint32_t *uninitialized_var(p);
+ int i;
+ void *seg;
+
+ if (type == 0) {
+ seg = (void*)xsc_get_send_wqe(qp, idx);
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+ "dump send wqe at %p\n", seg);
+ for (i = 0; i < qp->sq.seg_cnt; i++) {
+ p = get_seg_wqe(seg, i);
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]);
+ }
+ } else if (type == 1) {
+ seg = (void*)get_recv_wqe(qp, idx);
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+ "dump recv wqe at %p\n", seg);
+ for (i = 0; i < qp->rq.seg_cnt; i++) {
+ p = get_seg_wqe(seg, i);
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+ "0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], p[1], p[2], p[3]);
+ }
+ } else {
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+ "unknown type %d\n", type);
+ }
+}
+#else
+static inline void dump_wqe(int type, int idx, struct xsc_qp *qp) {};
+#endif
+
+static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq)
+{
+ uint16_t next_pid;
+ union xsc_db_data db;
+
+ if (unlikely(!nreq))
+ return;
+
+ qp->sq.head += nreq;
+ next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+ db.sq_next_pid = next_pid;
+ db.sqn = qp->sqn;
+ /*
+ * Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND, "nreq:%d\n", nreq);
+ udma_to_device_barrier();
+ WR_REG(qp->sq.db, db.raw_data);
+}
+
+static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr)
+{
+ struct xsc_qp *qp = to_xqp(ibqp);
+ void *seg;
+ struct xsc_send_wqe_ctrl_seg *ctrl;
+ struct xsc_wqe_data_seg *data_seg;
+
+ int nreq;
+ int err = 0;
+ int i;
+ unsigned idx;
+ unsigned seg_index = 1;
+ unsigned msg_len = 0;
+
+ if (unlikely(ibqp->state < IBV_QPS_RTS)) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "qp state is %u, should not post send\n", ibqp->state);
+ err = EINVAL;
+ *bad_wr = wr;
+ return err;
+ }
+
+ xsc_spin_lock(&qp->sq.lock);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ seg_index = 1;
+ msg_len = 0;
+ if (unlikely(wr->opcode < 0 ||
+ wr->opcode >= sizeof(xsc_ib_opcode) / sizeof(xsc_ib_opcode[0]))) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "bad opcode %d\n", wr->opcode);
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(xsc_wq_overflow(&qp->sq, nreq,
+ to_xcq(qp->ibv_qp->send_cq)))) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "send work queue overflow\n");
+ err = ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "max gs exceeded %d (max = %d)\n",
+ wr->num_sge, qp->sq.max_gs);
+ err = ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && wr->num_sge > 1)) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "rdma read, max gs exceeded %d (max = 1)\n",
+ wr->num_sge);
+ err = ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ zero_send_ds(idx, qp);
+ ctrl = seg = xsc_get_send_wqe(qp, idx);
+ ctrl->ds_data_num = 0;
+ WR_LE_16(ctrl->wqe_id,
+ qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT));
+ ctrl->se = wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0;
+ ctrl->ce = qp->sq_signal_bits ? 1 : (wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0);
+ ctrl->in_line = wr->send_flags & IBV_SEND_INLINE ? 1 : 0;
+ for (i = 0; i < wr->num_sge; ++i) {
+ if (likely(wr->sg_list[i].length)) {
+ msg_len += wr->sg_list[i].length;
+ }
+ }
+ ctrl->msg_len = msg_len;
+ ctrl->with_immdt = 0;
+
+ if (unlikely(wr->opcode == IBV_WR_RDMA_READ && msg_len == 0)) {
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "rdma read, msg len should not be 0\n");
+ /* workaround, return success for posting zero-length read */
+ err = 0;
+ goto out;
+ }
+
+ switch (ibqp->qp_type) {
+ case IBV_QPT_RC:
+ switch (wr->opcode) {
+ case IBV_WR_SEND_WITH_INV:
+ case IBV_WR_SEND:
+ break;
+ case IBV_WR_SEND_WITH_IMM:
+ ctrl->with_immdt = 1;
+ ctrl->opcode_data = send_ieth(wr);
+ break;
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ ctrl->with_immdt = 1;
+ ctrl->opcode_data = send_ieth(wr);
+ SWITCH_FALLTHROUGH;
+ case IBV_WR_RDMA_READ:
+ case IBV_WR_RDMA_WRITE:
+ if (ctrl->msg_len == 0)
+ break;
+ ctrl->ds_data_num++;
+ data_seg = get_seg_wqe(ctrl, seg_index);
+ set_remote_addr_seg(
+ data_seg,
+ msg_len,
+ wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ seg_index++;
+ break;
+ default:
+ printf("debug: opcode:%u NOT supported\n", wr->opcode);
+ err = EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
+ break;
+ default:
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "qp type:%u NOT supported\n", ibqp->qp_type);
+ err = EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
+ err = set_data_inl_seg(qp, wr, ctrl);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+ "inline layout failed, err %d\n", err);
+ goto out;
+ }
+ } else {
+ for (i = 0; i < wr->num_sge; ++i, ++seg_index) {
+ if (likely(wr->sg_list[i].length)) {
+ data_seg = get_seg_wqe(ctrl, seg_index);
+ set_local_data_seg(data_seg, &wr->sg_list[i]);
+ ctrl->ds_data_num++;
+ }
+ }
+ }
+
+ ctrl->msg_opcode = xsc_ib_opcode[wr->opcode];
+ if (ctrl->msg_len == 0) {
+ ctrl->ds_data_num = 0;
+ zero_send_ds(idx, qp);
+ }
+ qp->sq.wrid[idx] = wr->wr_id;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += 1;
+ if (ctrl->ce) {
+ qp->sq.flush_wqe_cnt++;
+ qp->sq.need_flush[idx] = 1;
+ }
+ qp->sq.wr_opcode[idx] = wr->opcode;
+
+ if (xsc_debug_mask & XSC_DBG_QP_SEND)
+ dump_wqe(0, idx, qp);
+ }
+
+out:
+ xsc_post_send_db(qp, nreq);
+ xsc_spin_unlock(&qp->sq.lock);
+
+ return err;
+}
+
+int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr)
+{
+ return _xsc_post_send(ibqp, wr, bad_wr);
+}
+
+static void set_wq_sig_seg(struct xsc_rwq *rwq, struct xsc_rwqe_sig *sig,
+ int size, uint16_t idx)
+{
+ uint8_t sign;
+ uint32_t qpn = rwq->wq.wq_num;
+
+ sign = calc_sig(sig, size);
+ sign ^= calc_sig(&qpn, 4);
+ sign ^= calc_sig(&idx, 2);
+ sig->signature = sign;
+}
+
+int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct xsc_rwq *rwq = to_xrwq(ibwq);
+ struct xsc_wqe_data_seg *scat;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i, j;
+ struct xsc_rwqe_sig *sig;
+
+ xsc_spin_lock(&rwq->rq.lock);
+
+ ind = rwq->rq.head & (rwq->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(xsc_wq_overflow(&rwq->rq, nreq,
+ to_xcq(rwq->wq.cq)))) {
+ err = ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > rwq->rq.max_gs)) {
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = get_wq_recv_wqe(rwq, ind);
+ sig = (struct xsc_rwqe_sig *)scat;
+ if (unlikely(rwq->wq_sig)) {
+ memset(sig, 0, 1 << rwq->rq.wqe_shift);
+ ++scat;
+ }
+
+ for (i = 0, j = 0; i < wr->num_sge; ++i) {
+ if (unlikely(!wr->sg_list[i].length))
+ continue;
+ //set_data_ptr_seg(scat + j++, wr->sg_list + i);
+ }
+
+ if (j < rwq->rq.max_gs) {
+ scat[j].seg_len = 0;
+ scat[j].mkey = htole32(XSC_INVALID_LKEY);
+ scat[j].va = 0;
+ }
+
+ if (unlikely(rwq->wq_sig))
+ set_wq_sig_seg(rwq, sig, (wr->num_sge + 1) << 4,
+ rwq->rq.head & 0xffff);
+
+ rwq->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (rwq->rq.wqe_cnt - 1);
+ rwq->rq.flush_wqe_cnt++;
+ }
+
+out:
+ if (likely(nreq)) {
+ rwq->rq.head += nreq;
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ udma_to_device_barrier();
+ *(rwq->recv_db) = htobe32(rwq->rq.head & 0xffff);
+ }
+
+ xsc_spin_unlock(&rwq->rq.lock);
+
+ return err;
+}
+
+int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct xsc_qp *qp = to_xqp(ibqp);
+ struct xsc_wqe_data_seg *recv_head;
+ struct xsc_wqe_data_seg *data_seg;
+ int err = 0;
+ uint16_t next_pid = 0;
+ union xsc_db_data db;
+ int nreq;
+ uint16_t idx;
+ int i;
+
+ xsc_spin_lock(&qp->rq.lock);
+
+ idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+ zero_recv_ds(idx, qp);
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(xsc_wq_overflow(&qp->rq, nreq,
+ to_xcq(qp->ibv_qp->recv_cq)))) {
+ printf("recv work queue overflow\n");
+ err = ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ printf("max gs exceeded %d (max = %d)\n",
+ wr->num_sge, qp->rq.max_gs);
+ err = EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ recv_head = get_recv_wqe(qp, idx);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ if (unlikely(!wr->sg_list[i].length))
+ continue;
+ data_seg = get_seg_wqe(recv_head, i);
+ WR_LE_32(data_seg->seg_len, wr->sg_list[i].length);
+ WR_LE_32(data_seg->mkey, wr->sg_list[i].lkey);
+ WR_LE_64(data_seg->va, wr->sg_list[i].addr);
+ }
+
+ qp->rq.wrid[idx] = wr->wr_id;
+
+ dump_wqe(1, idx, qp);
+ idx = (idx + 1) & (qp->rq.wqe_cnt - 1);
+ qp->rq.flush_wqe_cnt++;
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+ next_pid = qp->rq.head << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT);
+ db.rq_next_pid = next_pid;
+ db.rqn = qp->rqn;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ udma_to_device_barrier();
+ WR_REG(qp->rq.db, db.raw_data);
+ }
+
+ xsc_spin_unlock(&qp->rq.lock);
+
+ return err;
+}
+
+int xsc_use_huge(const char *key)
+{
+ char *e;
+ e = getenv(key);
+ if (e && !strcmp(e, "y"))
+ return 1;
+
+ return 0;
+}
+
+struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn)
+{
+ int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+ if (ctx->qp_table[tind].refcnt)
+ return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK];
+ else
+ return NULL;
+}
+
+int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp)
+{
+ int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+ if (!ctx->qp_table[tind].refcnt) {
+ ctx->qp_table[tind].table = calloc(XSC_QP_TABLE_MASK + 1,
+ sizeof(struct xsc_qp *));
+ if (!ctx->qp_table[tind].table)
+ return -1;
+ }
+
+ ++ctx->qp_table[tind].refcnt;
+ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = qp;
+ return 0;
+}
+
+void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn)
+{
+ int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+ if (!--ctx->qp_table[tind].refcnt)
+ free(ctx->qp_table[tind].table);
+ else
+ ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = NULL;
+}
+
+int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state,
+ enum ibv_qp_state state)
+{
+ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node;
+ struct xsc_qp *xqp = to_xqp(qp);
+ int ret = 0;
+
+ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP,
+ "modify qp: qpid %d, cur_qp_state %d, qp_state %d\n", xqp->rsc.rsn, cur_state, state);
+ if (cur_state == IBV_QPS_ERR && state != IBV_QPS_ERR) {
+ if(qp->recv_cq) {
+ list_for_each_safe(&to_xcq(qp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) {
+ if (err_rq_node->qp_id == xqp->rsc.rsn) {
+ list_del(&err_rq_node->entry);
+ free(err_rq_node);
+ }
+ }
+ }
+
+ if(qp->send_cq) {
+ list_for_each_safe(&to_xcq(qp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) {
+ if (err_sq_node->qp_id == xqp->rsc.rsn) {
+ list_del(&err_sq_node->entry);
+ free(err_sq_node);
+ }
+ }
+ }
+ return ret;
+ }
+
+ if (cur_state != IBV_QPS_ERR && state == IBV_QPS_ERR) {
+ if(qp->recv_cq) {
+ err_rq_node = calloc(1, sizeof(*err_rq_node));
+ if (!err_rq_node)
+ return ENOMEM;
+ err_rq_node->qp_id = xqp->rsc.rsn;
+ err_rq_node->is_sq = false;
+ list_add_tail(&to_xcq(qp->recv_cq)->err_state_qp_list, &err_rq_node->entry);
+ }
+
+ if(qp->send_cq) {
+ err_sq_node = calloc(1, sizeof(*err_sq_node));
+ if (!err_sq_node)
+ return ENOMEM;
+ err_sq_node->qp_id = xqp->rsc.rsn;
+ err_sq_node->is_sq = true;
+ list_add_tail(&to_xcq(qp->send_cq)->err_state_qp_list, &err_sq_node->entry);
+ }
+ }
+ return ret;
+}
diff --git a/providers/xscale/rqm_csr_defines.h b/providers/xscale/rqm_csr_defines.h
new file mode 100644
index 0000000..9552855
--- /dev/null
+++ b/providers/xscale/rqm_csr_defines.h
@@ -0,0 +1,200 @@
+#ifndef _RQM_CSR_DEFINES_H_
+#define _RQM_CSR_DEFINES_H_
+
+#define RQM_SOFT_RESET_REG_ADDR 0x5000
+#define RQM_SOFT_RESET_MASK 0x1
+#define RQM_SOFT_RESET_SHIFT 0
+
+#define RQM_COUNTER_CONFIG_REG_ADDR 0x5020
+#define RQM_CFG_CNT_WRAP_MASK 0x1
+#define RQM_CFG_CNT_WRAP_SHIFT 0
+#define RQM_CFG_CNT_RC_MASK 0x2
+#define RQM_CFG_CNT_RC_SHIFT 1
+
+#define RQM_SCRATCH_PAD_REG_ADDR 0x5040
+#define RQM_SCRATCH_PAD_MASK 0xffffffffffffffff
+#define RQM_SCRATCH_PAD_SHIFT 0
+
+#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x5060
+#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8
+#define RQM_RQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_RING_ADDR_MASK 0xffffffffffffffff
+#define RQM_CFG_CPU2RQM_RING_ADDR_SHIFT 0
+
+#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x5160
+#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8
+#define RQM_RQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_RING_SIZE_MASK 0xffff
+#define RQM_CFG_CPU2RQM_RING_SIZE_SHIFT 0
+
+#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_ADDR 0x5260
+#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_SIZE 8
+#define RQM_RQM_CONFIG_REG_NEXT_PID_ARRAY_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_NEXT_PID_MASK 0xffff
+#define RQM_CFG_CPU2RQM_NEXT_PID_SHIFT 0
+
+#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x5360
+#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8
+#define RQM_RQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_CFG_EN_MASK 0x1
+#define RQM_CFG_CPU2RQM_CFG_EN_SHIFT 0
+
+#define RQM_RQM_STATUS_REG_ARRAY_ADDR 0x5460
+#define RQM_RQM_STATUS_REG_ARRAY_SIZE 8
+#define RQM_RQM_STATUS_REG_ARRAY_STRIDE 0x20
+#define RQM_STS_RQM2CPU_DONE_CID_MASK 0xffff
+#define RQM_STS_RQM2CPU_DONE_CID_SHIFT 0
+
+#define RQM_RQM_CONFIG_MER_QPID_FIFO_TH_ADDR 0x5560
+#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_MASK 0x1f
+#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AMTY_TH_SHIFT 0
+#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_MASK 0x3e0
+#define RQM_CFG_CPU2RQM_MER_QPID_FIFO_AFUL_TH_SHIFT 5
+
+#define RQM_RQM_CONFIG_DMA_QPID_FIFO_TH_ADDR 0x5580
+#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_MASK 0x1f
+#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AMTY_TH_SHIFT 0
+#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_MASK 0x3e0
+#define RQM_CFG_CPU2RQM_DMA_QPID_FIFO_AFUL_TH_SHIFT 5
+
+#define RQM_RQM_CONFIG_PTR_QPID_FIFO_TH_ADDR 0x55a0
+#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_MASK 0x1f
+#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AMTY_TH_SHIFT 0
+#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_MASK 0x3e0
+#define RQM_CFG_CPU2RQM_PTR_QPID_FIFO_AFUL_TH_SHIFT 5
+
+#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_ADDR 0x55c0
+#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_SIZE 8
+#define RQM_RQM_CONFIG_WQE_FIFO_AMTY_TH_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_MASK 0x1f
+#define RQM_CFG_CPU2RQM_WQE_FIFO_AMTY_TH_SHIFT 0
+
+#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_ADDR 0x56c0
+#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_SIZE 8
+#define RQM_RQM_CONFIG_WQE_FIFO_AFUL_TH_STRIDE 0x20
+#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_MASK 0x1f
+#define RQM_CFG_CPU2RQM_WQE_FIFO_AFUL_TH_SHIFT 0
+
+#define RQM_RQM_INT_STATE_REG_ADDR 0x57c0
+#define RQM_RQM_WQE_FIFO_OVFL_ERR_MASK 0xff
+#define RQM_RQM_WQE_FIFO_OVFL_ERR_SHIFT 0
+#define RQM_RQM_WQE_FIFO_UNFL_ERR_MASK 0xff00
+#define RQM_RQM_WQE_FIFO_UNFL_ERR_SHIFT 8
+#define RQM_RQM_NO_WQE_ERR_MASK 0xff0000
+#define RQM_RQM_NO_WQE_ERR_SHIFT 16
+
+#define RQM_RQM_FIFO_USED_CNT_REG_ADDR 0x57e0
+#define RQM_RQM_FIFO_USED_CNT_REG_SIZE 8
+#define RQM_RQM_FIFO_USED_CNT_REG_STRIDE 0x20
+#define RQM_RQM_WQE_FIFO_USED_CNT_MASK 0xf
+#define RQM_RQM_WQE_FIFO_USED_CNT_SHIFT 0
+
+#define RQM_RQM_CMD_FIFO_STATE_REG_ADDR 0x58e0
+#define RQM_RQM_WQE_FIFO_MTY_MASK 0xff
+#define RQM_RQM_WQE_FIFO_MTY_SHIFT 0
+#define RQM_RQM_WQE_FIFO_FUL_MASK 0xff00
+#define RQM_RQM_WQE_FIFO_FUL_SHIFT 8
+
+#define RQM_RQM_OTH_FIFO_STATE_REG_ADDR 0x5900
+#define RQM_RQM_OTH_FIFO_MTY_MASK 0x7
+#define RQM_RQM_OTH_FIFO_MTY_SHIFT 0
+#define RQM_RQM_OTH_FIFO_AFUL_MASK 0x38
+#define RQM_RQM_OTH_FIFO_AFUL_SHIFT 3
+#define RQM_RQM_OTH_FIFO_OVFL_ERR_MASK 0x1c0
+#define RQM_RQM_OTH_FIFO_OVFL_ERR_SHIFT 6
+#define RQM_RQM_OTH_FIFO_UNFL_ERR_MASK 0xe00
+#define RQM_RQM_OTH_FIFO_UNFL_ERR_SHIFT 9
+
+#define RQM_RQM_OTHERS_FIFO_USED_CNT_REG_ADDR 0x5920
+#define RQM_RQM_MER_REQ_FIFO_USED_CNT_MASK 0xf
+#define RQM_RQM_MER_REQ_FIFO_USED_CNT_SHIFT 0
+#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_MASK 0xf0
+#define RQM_RQM_DMA_REQ_FIFO_USED_CNT_SHIFT 4
+#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_MASK 0xf00
+#define RQM_RQM_PTR_REQ_FIFO_USED_CNT_SHIFT 8
+
+#define RQM_RQM_DEBUG_INFO_STATE_REG_0_ADDR 0x5940
+#define RQM_RQM2MER_DBG_OPCODE_MASK 0xff
+#define RQM_RQM2MER_DBG_OPCODE_SHIFT 0
+#define RQM_RQM2MER_DBG_TX0_RX1_MASK 0x100
+#define RQM_RQM2MER_DBG_TX0_RX1_SHIFT 8
+#define RQM_RQM2MER_DBG_CAP_MASK 0x200
+#define RQM_RQM2MER_DBG_CAP_SHIFT 9
+#define RQM_RQM2MER_DBG_L_QPID_MASK 0x1c00
+#define RQM_RQM2MER_DBG_L_QPID_SHIFT 10
+#define RQM_RQM2MER_DBG_SN_MASK 0x1fffffe000
+#define RQM_RQM2MER_DBG_SN_SHIFT 13
+
+#define RQM_RQM_DEBUG_INFO_STATE_REG_1_ADDR 0x5960
+#define RQM_RQM2MER_DBG_MOD_IF_BM_MASK 0xffffffffffffffff
+#define RQM_RQM2MER_DBG_MOD_IF_BM_SHIFT 0
+
+#define RQM_RQM_DEBUG_INFO_STATE_REG_2_ADDR 0x5980
+#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_MASK 0xffffffff
+#define RQM_RQM2MER_DBG_RQM2MER_VLD_CNT_SHIFT 0
+#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_MASK 0xffffffff00000000
+#define RQM_RQM2MER_DBG_DD2RQM_DATA_VLD_CNT_SHIFT 32
+
+#define RQM_RQM_DMA_REQ_LEN_STATE_REG_ADDR 0x59a0
+#define RQM_RQM_DMA_REQ_LEN_MASK 0x3ff
+#define RQM_RQM_DMA_REQ_LEN_SHIFT 0
+
+#define RQM_RQM_DMA_REQ_ADDR_STATE_REG_ADDR 0x59c0
+#define RQM_RQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_REQ_ADDR_SHIFT 0
+
+#define RQM_RQM_WQE_WQEID_ADDR 0x59e0
+#define RQM_RQM_WQE_WQEID_MASK 0xffff
+#define RQM_RQM_WQE_WQEID_SHIFT 0
+
+#define RQM_RQM_WQE_RECV_LEN_ADDR 0x5a00
+#define RQM_RQM_WQE_REC_LEN_MASK 0x7fffffff
+#define RQM_RQM_WQE_REC_LEN_SHIFT 0
+
+#define RQM_RQM_WQE_LOCAL_VA_ADDR 0x5a20
+#define RQM_RQM_WQE_L_VA_MASK 0xffffffffffffffff
+#define RQM_RQM_WQE_L_VA_SHIFT 0
+
+#define RQM_RQM_WQE_LOCAL_KEY_ADDR 0x5a40
+#define RQM_RQM_WQE_L_KEY_MASK 0xffffffff
+#define RQM_RQM_WQE_L_KEY_SHIFT 0
+
+#define RQM_MER_RQM_WQE_QPID_ADDR 0x5a60
+#define RQM_RQM_WQE_QPID_MASK 0x7
+#define RQM_RQM_WQE_QPID_SHIFT 0
+
+#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_ADDR 0x5a80
+#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_SIZE 8
+#define RQM_RQM_STATUS_NEXT_CID_REG_ARRAY_STRIDE 0x20
+#define RQM_RQM_NEXT_CID_MASK 0xffff
+#define RQM_RQM_NEXT_CID_SHIFT 0
+
+#define RQM_RQM_DMA_IN_SOP_CNT_REG_ADDR 0x5b80
+#define RQM_RQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_IN_SOP_CNT_SHIFT 0
+
+#define RQM_RQM_DMA_IN_EOP_CNT_REG_ADDR 0x5ba0
+#define RQM_RQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_IN_EOP_CNT_SHIFT 0
+
+#define RQM_RQM_DMA_IN_VLD_CNT_REG_ADDR 0x5bc0
+#define RQM_RQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_IN_VLD_CNT_SHIFT 0
+
+#define RQM_RQM_DMA_REQ_CNT_REG_ADDR 0x5be0
+#define RQM_RQM_DMA_REQ_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_REQ_CNT_SHIFT 0
+
+#define RQM_RQM_DMA_GNT_CNT_REG_ADDR 0x5c00
+#define RQM_RQM_DMA_GNT_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_DMA_GNT_CNT_SHIFT 0
+
+#define RQM_RQM_MER_VLD_CNT_REG_ADDR 0x5c20
+#define RQM_RQM_MER_VLD_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_MER_VLD_CNT_SHIFT 0
+
+#define RQM_RQM_MER_REQ_CNT_REG_ADDR 0x5c40
+#define RQM_RQM_MER_REQ_CNT_MASK 0xffffffffffffffff
+#define RQM_RQM_MER_REQ_CNT_SHIFT 0
+
+#endif
diff --git a/providers/xscale/sqm_csr_defines.h b/providers/xscale/sqm_csr_defines.h
new file mode 100644
index 0000000..e0dc6e9
--- /dev/null
+++ b/providers/xscale/sqm_csr_defines.h
@@ -0,0 +1,204 @@
+#ifndef _SQM_CSR_DEFINES_H_
+#define _SQM_CSR_DEFINES_H_
+
+#define SQM_SOFT_RESET_REG_ADDR 0x4000
+#define SQM_SOFT_RESET_MASK 0x1
+#define SQM_SOFT_RESET_SHIFT 0
+
+#define SQM_COUNTER_CONFIG_REG_ADDR 0x4020
+#define SQM_CFG_CNT_WRAP_MASK 0x1
+#define SQM_CFG_CNT_WRAP_SHIFT 0
+#define SQM_CFG_CNT_RC_MASK 0x2
+#define SQM_CFG_CNT_RC_SHIFT 1
+
+#define SQM_SCRATCH_PAD_REG_ADDR 0x4040
+#define SQM_SCRATCH_PAD_MASK 0xffffffffffffffff
+#define SQM_SCRATCH_PAD_SHIFT 0
+
+#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_ADDR 0x4060
+#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_SIZE 8
+#define SQM_SQM_CONFIG_REG_RING_ADDR_ARRAY_STRIDE 0x20
+#define SQM_CFG_CPU2SQM_RING_ADDR_MASK 0xffffffffffffffff
+#define SQM_CFG_CPU2SQM_RING_ADDR_SHIFT 0
+
+#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_ADDR 0x4160
+#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_SIZE 8
+#define SQM_SQM_CONFIG_REG_RING_SIZE_ARRAY_STRIDE 0x20
+#define SQM_CFG_CPU2SQM_RING_SIZE_MASK 0xffff
+#define SQM_CFG_CPU2SQM_RING_SIZE_SHIFT 0
+
+#define SQM_SQM_CONFIG_REG_ARRAY_ADDR 0x4260
+#define SQM_SQM_CONFIG_REG_ARRAY_SIZE 8
+#define SQM_SQM_CONFIG_REG_ARRAY_STRIDE 0x20
+#define SQM_CFG_CPU2SQM_NEXT_PID_MASK 0xffff
+#define SQM_CFG_CPU2SQM_NEXT_PID_SHIFT 0
+
+#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_ADDR 0x4360
+#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_SIZE 8
+#define SQM_SQM_CONFIG_REG_CFG_EN_ARRAY_STRIDE 0x20
+#define SQM_CFG_CPU2SQM_CFG_EN_MASK 0x1
+#define SQM_CFG_CPU2SQM_CFG_EN_SHIFT 0
+
+#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_ADDR 0x4460
+#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_SIZE 8
+#define SQM_SQM_STATUS_REG_DONE_CID_ARRAY_STRIDE 0x20
+#define SQM_STS_SQM2CPU_DONE_CID_MASK 0xffff
+#define SQM_STS_SQM2CPU_DONE_CID_SHIFT 0
+
+#define SQM_SQM_CFG_WQE_FIFO_TH_ADDR 0x4560
+#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_MASK 0xff
+#define SQM_CFG_CPU2SQM_WQE_FIFO_AFUL_TH_SHIFT 0
+#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_MASK 0xff00
+#define SQM_CFG_CPU2SQM_WQE_FIFO_AMTY_TH_SHIFT 8
+
+#define SQM_SQM_CONFIG_DBG_FIFO_REG_CFG_ADDR 0x4580
+#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_MASK 0xff
+#define SQM_CFG_CPU2SQM_DBG_FIFO_AFUL_TH_SHIFT 0
+#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_MASK 0xff00
+#define SQM_CFG_CPU2SQM_DBG_FIFO_AMTY_TH_SHIFT 8
+
+#define SQM_SQM_CONFIG_QPID_W_FIFO_REG_CFG_ADDR 0x45a0
+#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_MASK 0xff
+#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AFUL_TH_SHIFT 0
+#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_MASK 0xff00
+#define SQM_CFG_CPU2SQM_QPID_W_FIFO_AMTY_TH_SHIFT 8
+
+#define SQM_SQM_CONFIG_QPID_R_FIFO_REG_CFG_ADDR 0x45c0
+#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_MASK 0xff
+#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AFUL_TH_SHIFT 0
+#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_MASK 0xff00
+#define SQM_CFG_CPU2SQM_QPID_R_FIFO_AMTY_TH_SHIFT 8
+
+#define SQM_SQM_INT_STATE_REG_ADDR 0x45e0
+#define SQM_SQM_FIFO_OVFL_ERR_MASK 0xf
+#define SQM_SQM_FIFO_OVFL_ERR_SHIFT 0
+#define SQM_SQM_FIFO_UNFL_ERR_MASK 0xf0
+#define SQM_SQM_FIFO_UNFL_ERR_SHIFT 4
+#define SQM_SQM_FIFO_MTY_MASK 0xf00
+#define SQM_SQM_FIFO_MTY_SHIFT 8
+#define SQM_SQM_FIFO_AFUL_MASK 0xf000
+#define SQM_SQM_FIFO_AFUL_SHIFT 12
+#define SQM_SQM_SOP_EOP_NO_EQUAL_MASK 0x10000
+#define SQM_SQM_SOP_EOP_NO_EQUAL_SHIFT 16
+
+#define SQM_SQM_FIFO_USED_CNT_REG_ADDR 0x4600
+#define SQM_SQM_WQE_FIFO_USED_CNT_MASK 0x7f
+#define SQM_SQM_WQE_FIFO_USED_CNT_SHIFT 0
+#define SQM_SQM_HEAD_FIFO_USED_CNT_MASK 0x3f80
+#define SQM_SQM_HEAD_FIFO_USED_CNT_SHIFT 7
+#define SQM_SQM_PTR_FIFO_USED_CNT_MASK 0x1fc000
+#define SQM_SQM_PTR_FIFO_USED_CNT_SHIFT 14
+#define SQM_SQM_DBG_FIFO_USED_CNT_MASK 0xfe00000
+#define SQM_SQM_DBG_FIFO_USED_CNT_SHIFT 21
+
+#define SQM_SQM_DMA_REQUEST_LEN_REG_ADDR 0x4620
+#define SQM_SQM_DMA_REQ_LEN_MASK 0x3ff
+#define SQM_SQM_DMA_REQ_LEN_SHIFT 0
+
+#define SQM_SQM_DMA_REQUEST_ADDR_REG_ADDR 0x4640
+#define SQM_SQM_DMA_REQ_ADDR_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_REQ_ADDR_SHIFT 0
+
+#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_ADDR 0x4660
+#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_SIZE 8
+#define SQM_SQM_STATUS_REG_NEXT_CID_ARRAY_STRIDE 0x20
+#define SQM_SQM_NEXT_CID_MASK 0xffff
+#define SQM_SQM_NEXT_CID_SHIFT 0
+
+#define SQM_SQM_WQE_OPCODE_ADDR 0x4760
+#define SQM_SQM_WQE_OPCODE_MASK 0xff
+#define SQM_SQM_WQE_OPCODE_SHIFT 0
+
+#define SQM_SQM_WQE_WQEID_ADDR 0x4780
+#define SQM_SQM_WQE_WQEID_MASK 0xffff
+#define SQM_SQM_WQE_WQEID_SHIFT 0
+
+#define SQM_SQM_WQE_R_VA_ADDR 0x47a0
+#define SQM_SQM_WQE_R_VA_MASK 0xffffffffffffffff
+#define SQM_SQM_WQE_R_VA_SHIFT 0
+
+#define SQM_SQM_WQE_R_KEY_ADDR 0x47c0
+#define SQM_SQM_WQE_R_KEY_MASK 0xffffffff
+#define SQM_SQM_WQE_R_KEY_SHIFT 0
+
+#define SQM_SQM_WQE_L_LEN_ADDR 0x47e0
+#define SQM_SQM_WQE_L_LEN_MASK 0x7fffffff
+#define SQM_SQM_WQE_L_LEN_SHIFT 0
+
+#define SQM_SQM_WQE_L_VA_ADDR 0x4800
+#define SQM_SQM_WQE_L_VA_MASK 0xffffffffffffffff
+#define SQM_SQM_WQE_L_VA_SHIFT 0
+
+#define SQM_SQM_WQE_L_KEY_ADDR 0x4820
+#define SQM_SQM_WQE_L_KEY_MASK 0xffffffff
+#define SQM_SQM_WQE_L_KEY_SHIFT 0
+
+#define SQM_SQM_WQE_QPID_ADDR 0x4840
+#define SQM_SQM_WQE_QPID_MASK 0x7
+#define SQM_SQM_WQE_QPID_SHIFT 0
+
+#define SQM_SQM_DMA_IN_SOP_CNT_REG_ADDR 0x4860
+#define SQM_SQM_DMA_IN_SOP_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_IN_SOP_CNT_SHIFT 0
+
+#define SQM_SQM_DMA_IN_EOP_CNT_REG_ADDR 0x4880
+#define SQM_SQM_DMA_IN_EOP_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_IN_EOP_CNT_SHIFT 0
+
+#define SQM_SQM_DMA_IN_VLD_CNT_REG_ADDR 0x48a0
+#define SQM_SQM_DMA_IN_VLD_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_IN_VLD_CNT_SHIFT 0
+
+#define SQM_SQM_DMA_REQ_CNT_REG_ADDR 0x48c0
+#define SQM_SQM_DMA_REQ_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_REQ_CNT_SHIFT 0
+
+#define SQM_SQM_DMA_GNT_CNT_REG_ADDR 0x48e0
+#define SQM_SQM_DMA_GNT_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_DMA_GNT_CNT_SHIFT 0
+
+#define SQM_SQM_MET_VLD_CNT_REG_ADDR 0x4900
+#define SQM_SQM_MET_CNT_MASK 0xffffffffffffffff
+#define SQM_SQM_MET_CNT_SHIFT 0
+
+#define SQM_SQM_CONFIG_CAP_CFG_EN_ADDR 0x4920
+#define SQM_CFG_CPU2SQM_CAP_EN_CLR_MASK 0x1
+#define SQM_CFG_CPU2SQM_CAP_EN_CLR_SHIFT 0
+#define SQM_CFG_CPU2SQM_CAP_QPID_EN_MASK 0x2
+#define SQM_CFG_CPU2SQM_CAP_QPID_EN_SHIFT 1
+#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_MASK 0x4
+#define SQM_CFG_CPU2SQM_CAP_OPCODE_EN_SHIFT 2
+#define SQM_CFG_CPU2SQM_CAP_QPID_MASK 0x38
+#define SQM_CFG_CPU2SQM_CAP_QPID_SHIFT 3
+#define SQM_CFG_CPU2SQM_CAP_OPCODE_MASK 0x3fc0
+#define SQM_CFG_CPU2SQM_CAP_OPCODE_SHIFT 6
+
+#define SQM_SQM_DEBUG_INFO_STATE_REG_0_ADDR 0x4940
+#define SQM_SQM2MET_DBG_OPCODE_MASK 0xff
+#define SQM_SQM2MET_DBG_OPCODE_SHIFT 0
+#define SQM_SQM2MET_DBG_TX0_RX1_MASK 0x100
+#define SQM_SQM2MET_DBG_TX0_RX1_SHIFT 8
+#define SQM_SQM2MET_DBG_CAP_MASK 0x200
+#define SQM_SQM2MET_DBG_CAP_SHIFT 9
+#define SQM_SQM2MET_DBG_L_QPID_MASK 0x1c00
+#define SQM_SQM2MET_DBG_L_QPID_SHIFT 10
+#define SQM_SQM2MET_DBG_SN_MASK 0x1fffffe000
+#define SQM_SQM2MET_DBG_SN_SHIFT 13
+
+#define SQM_SQM_DEBUG_INFO_STATE_REG_1_ADDR 0x4960
+#define SQM_SQM2MET_DBG_MOD_IF_BM_MASK 0xffffffffffffffff
+#define SQM_SQM2MET_DBG_MOD_IF_BM_SHIFT 0
+
+#define SQM_SQM_DMA_REQ_COUNTER_REG_ADDR 0x4980
+#define SQM_SQM_DMA_REQ_COUNTER_MASK 0xff
+#define SQM_SQM_DMA_REQ_COUNTER_SHIFT 0
+
+#define SQM_SQM_DMA_GNT_COUNTER_REG_ADDR 0x49a0
+#define SQM_SQM_DMA_GNT_COUNTER_MASK 0xff
+#define SQM_SQM_DMA_GNT_COUNTER_SHIFT 0
+
+#define SQM_SQM_SQM2MET_COUNTER_REG_ADDR 0x49c0
+#define SQM_SQM_SQM2MET_CNT_MASK 0xff
+#define SQM_SQM_SQM2MET_CNT_SHIFT 0
+
+#endif
diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c
new file mode 100644
index 0000000..937bed1
--- /dev/null
+++ b/providers/xscale/verbs.c
@@ -0,0 +1,2816 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <string.h>
+#include <pthread.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <ccan/array_size.h>
+
+#include <util/compiler.h>
+#include <util/mmio.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+#include <rdma/xsc_user_ioctl_cmds.h>
+#include <infiniband/cmd_write.h>
+
+#include "xscale.h"
+#include "xsc-abi.h"
+#include "wqe.h"
+#include "xsc_hsi.h"
+
+int xsc_single_threaded = 0;
+
+static inline int is_xrc_tgt(int type)
+{
+ return type == IBV_QPT_XRC_RECV;
+}
+
+static void xsc_set_fw_version(struct ibv_device_attr *attr, union xsc_ib_fw_ver *fw_ver)
+{
+ uint8_t ver_major = fw_ver->s.ver_major;
+ uint8_t ver_minor = fw_ver->s.ver_minor;
+ uint16_t ver_patch = fw_ver->s.ver_patch;
+ uint32_t ver_tweak = fw_ver->s.ver_tweak;
+
+ if (ver_tweak == 0) {
+ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u",
+ ver_major, ver_minor, ver_patch);
+ } else {
+ snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u+%u",
+ ver_major, ver_minor, ver_patch, ver_tweak);
+ }
+}
+
+static int xsc_read_clock(struct ibv_context *context, uint64_t *cycles)
+{
+ unsigned int clockhi, clocklo, clockhi1;
+ int i;
+ struct xsc_context *ctx = to_xctx(context);
+
+ if (!ctx->hca_core_clock)
+ return EOPNOTSUPP;
+
+ /* Handle wraparound */
+ for (i = 0; i < 2; i++) {
+ clockhi = be32toh(mmio_read32_be(ctx->hca_core_clock));
+ clocklo = be32toh(mmio_read32_be(ctx->hca_core_clock + 4));
+ clockhi1 = be32toh(mmio_read32_be(ctx->hca_core_clock));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
+
+ return 0;
+}
+
+int xsc_query_rt_values(struct ibv_context *context,
+ struct ibv_values_ex *values)
+{
+ uint32_t comp_mask = 0;
+ int err = 0;
+
+ if (!check_comp_mask(values->comp_mask, IBV_VALUES_MASK_RAW_CLOCK))
+ return EINVAL;
+
+ if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
+ uint64_t cycles;
+
+ err = xsc_read_clock(context, &cycles);
+ if (!err) {
+ values->raw_clock.tv_sec = 0;
+ values->raw_clock.tv_nsec = cycles;
+ comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
+ }
+ }
+
+ values->comp_mask = comp_mask;
+
+ return err;
+}
+
+int xsc_query_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr)
+{
+ struct ibv_query_port cmd;
+
+ return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
+}
+
+struct ibv_pd *xsc_alloc_pd(struct ibv_context *context)
+{
+ struct ibv_alloc_pd cmd;
+ struct xsc_alloc_pd_resp resp;
+ struct xsc_pd *pd;
+
+ pd = calloc(1, sizeof *pd);
+ if (!pd)
+ return NULL;
+
+ if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp)) {
+ free(pd);
+ return NULL;
+ }
+
+ atomic_init(&pd->refcount, 1);
+ pd->pdn = resp.pdn;
+ xsc_dbg(to_xctx(context)->dbg_fp, XSC_DBG_PD, "pd number:%u\n", pd->pdn);
+
+ return &pd->ibv_pd;
+}
+
+struct ibv_pd *
+xsc_alloc_parent_domain(struct ibv_context *context,
+ struct ibv_parent_domain_init_attr *attr)
+{
+ struct xsc_parent_domain *xparent_domain;
+
+ if (ibv_check_alloc_parent_domain(attr))
+ return NULL;
+
+ if (attr->comp_mask) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ xparent_domain = calloc(1, sizeof(*xparent_domain));
+ if (!xparent_domain) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ xparent_domain->xpd.xprotection_domain = to_xpd(attr->pd);
+ atomic_fetch_add(&xparent_domain->xpd.xprotection_domain->refcount, 1);
+ atomic_init(&xparent_domain->xpd.refcount, 1);
+
+ ibv_initialize_parent_domain(
+ &xparent_domain->xpd.ibv_pd,
+ &xparent_domain->xpd.xprotection_domain->ibv_pd);
+
+ return &xparent_domain->xpd.ibv_pd;
+}
+
+static int xsc_dealloc_parent_domain(struct xsc_parent_domain *xparent_domain)
+{
+ if (atomic_load(&xparent_domain->xpd.refcount) > 1)
+ return EBUSY;
+
+ atomic_fetch_sub(&xparent_domain->xpd.xprotection_domain->refcount, 1);
+
+ free(xparent_domain);
+ return 0;
+}
+
+int xsc_free_pd(struct ibv_pd *pd)
+{
+ int ret;
+ struct xsc_parent_domain *xparent_domain = to_xparent_domain(pd);
+ struct xsc_pd *xpd = to_xpd(pd);
+
+ if (xparent_domain)
+ return xsc_dealloc_parent_domain(xparent_domain);
+
+ if (atomic_load(&xpd->refcount) > 1)
+ return EBUSY;
+
+ ret = ibv_cmd_dealloc_pd(pd);
+ if (ret)
+ return ret;
+
+ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_PD, "dealloc pd\n");
+ free(xpd);
+
+ return 0;
+}
+
+struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, int acc)
+{
+ struct xsc_mr *mr;
+ struct ibv_reg_mr cmd;
+ int ret;
+ enum ibv_access_flags access = (enum ibv_access_flags)acc;
+ struct ib_uverbs_reg_mr_resp resp;
+
+ mr = calloc(1, sizeof(*mr));
+ if (!mr)
+ return NULL;
+
+ ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access,
+ &mr->vmr, &cmd, sizeof(cmd), &resp,
+ sizeof resp);
+ if (ret) {
+ xsc_free_buf(&(mr->buf));
+ free(mr);
+ return NULL;
+ }
+ mr->alloc_flags = acc;
+
+ xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_MR, "lkey:%u, rkey:%u\n",
+ mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey);
+
+ return &mr->vmr.ibv_mr;
+}
+
+struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd)
+{
+ struct xsc_mr *mr;
+ struct xsc_context *ctx = to_xctx(pd->context);
+
+ if (ctx->dump_fill_mkey == XSC_INVALID_LKEY) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ mr = calloc(1, sizeof(*mr));
+ if (!mr) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ mr->vmr.ibv_mr.lkey = ctx->dump_fill_mkey;
+
+ mr->vmr.ibv_mr.context = pd->context;
+ mr->vmr.ibv_mr.pd = pd;
+ mr->vmr.ibv_mr.addr = NULL;
+ mr->vmr.ibv_mr.length = SIZE_MAX;
+ mr->vmr.mr_type = IBV_MR_TYPE_NULL_MR;
+
+ return &mr->vmr.ibv_mr;
+}
+
+enum {
+ XSC_DM_ALLOWED_ACCESS = IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_REMOTE_ATOMIC |
+ IBV_ACCESS_ZERO_BASED
+};
+
+struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm,
+ uint64_t dm_offset, size_t length,
+ unsigned int acc)
+{
+ struct xsc_dm *dm = to_xdm(ibdm);
+ struct xsc_mr *mr;
+ int ret;
+
+ if (acc & ~XSC_DM_ALLOWED_ACCESS) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ mr = calloc(1, sizeof(*mr));
+ if (!mr) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = ibv_cmd_reg_dm_mr(pd, &dm->verbs_dm, dm_offset, length, acc,
+ &mr->vmr, NULL);
+ if (ret) {
+ free(mr);
+ return NULL;
+ }
+
+ mr->alloc_flags = acc;
+
+ return &mr->vmr.ibv_mr;
+}
+
+int xsc_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
+ void *addr, size_t length, int access)
+{
+ struct ibv_rereg_mr cmd;
+ struct ib_uverbs_rereg_mr_resp resp;
+
+ return ibv_cmd_rereg_mr(vmr, flags, addr, length, (uintptr_t)addr,
+ access, pd, &cmd, sizeof(cmd), &resp,
+ sizeof(resp));
+}
+
+int xsc_dereg_mr(struct verbs_mr *vmr)
+{
+ int ret;
+
+ if (vmr->mr_type == IBV_MR_TYPE_NULL_MR)
+ goto free;
+
+ ret = ibv_cmd_dereg_mr(vmr);
+ if (ret)
+ return ret;
+
+free:
+ free(vmr);
+ return 0;
+}
+
+int xsc_round_up_power_of_two(long long sz)
+{
+ long long ret;
+
+ for (ret = 1; ret < sz; ret <<= 1)
+ ; /* nothing */
+
+ if (ret > INT_MAX) {
+ fprintf(stderr, "%s: roundup overflow\n", __func__);
+ return -ENOMEM;
+ }
+
+ return (int)ret;
+}
+
+static int align_queue_size(long long req)
+{
+ return xsc_round_up_power_of_two(req);
+}
+
+enum {
+ CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
+ IBV_WC_EX_WITH_CVLAN |
+ IBV_WC_EX_WITH_FLOW_TAG |
+ IBV_WC_EX_WITH_TM_INFO |
+ IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK
+};
+
+enum {
+ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS
+};
+
+enum {
+ CREATE_CQ_SUPPORTED_FLAGS =
+ IBV_CREATE_CQ_ATTR_SINGLE_THREADED |
+ IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN
+};
+
+enum {
+ XSC_DV_CREATE_CQ_SUP_COMP_MASK =
+ (XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE |
+ XSCDV_CQ_INIT_ATTR_MASK_FLAGS |
+ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE),
+};
+
+static int xsc_cqe_depth_check(void)
+{
+ char *e;
+
+ e = getenv("XSC_CQE_DEPTH_CHECK");
+ if (e && !strcmp(e, "n"))
+ return 0;
+
+ return 1;
+}
+
+static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+ const struct ibv_cq_init_attr_ex *cq_attr,
+ int cq_alloc_flags,
+ struct xscdv_cq_init_attr *xcq_attr)
+{
+ struct xsc_create_cq cmd = {};
+ struct xsc_create_cq_resp resp = {};
+ struct xsc_create_cq_ex cmd_ex = {};
+ struct xsc_create_cq_ex_resp resp_ex = {};
+ struct xsc_ib_create_cq *cmd_drv;
+ struct xsc_ib_create_cq_resp *resp_drv;
+ struct xsc_cq *cq;
+ int cqe_sz;
+ int ret;
+ int ncqe;
+ struct xsc_context *xctx = to_xctx(context);
+ bool use_ex = false;
+ char *env;
+ int i;
+
+ if (!cq_attr->cqe) {
+ xsc_err("CQE invalid\n");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u\n", cq_attr->cqe);
+
+ if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
+ xsc_err("Unsupported comp_mask for create cq\n");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
+ cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
+ xsc_err("Unsupported creation flags requested for create cq\n");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) {
+ xsc_err("unsupported flgas:0x%lx\n", cq_attr->wc_flags);
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ cq = calloc(1, sizeof *cq);
+ if (!cq) {
+ xsc_err("Alloc CQ failed\n");
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) {
+ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED)
+ cq->flags |= XSC_CQ_FLAGS_SINGLE_THREADED;
+ if (cq_attr->flags & IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN)
+ use_ex = true;
+ }
+
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "use_ex:%u\n", use_ex);
+
+ cmd_drv = use_ex ? &cmd_ex.drv_payload : &cmd.drv_payload;
+ resp_drv = use_ex ? &resp_ex.drv_payload : &resp.drv_payload;
+
+ cq->cons_index = 0;
+
+ if (xsc_spinlock_init(&cq->lock, !xsc_single_threaded))
+ goto err;
+
+ ncqe = align_queue_size(cq_attr->cqe);
+ if (ncqe < XSC_CQE_RING_DEPTH_MIN) {
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u is not enough, set it as %u\n",
+ ncqe, XSC_CQE_RING_DEPTH_MIN);
+ ncqe = XSC_CQE_RING_DEPTH_MIN;
+ }
+
+ if (ncqe > XSC_CQE_RING_DEPTH_MAX) {
+ if (xsc_cqe_depth_check()) {
+ xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n",
+ ncqe, XSC_CQE_RING_DEPTH_MAX);
+ errno = EINVAL;
+ goto err_spl;
+ } else {
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE ring size %u exceeds the MAX ring szie, set it as %u\n",
+ ncqe, XSC_CQE_RING_DEPTH_MAX);
+ ncqe = XSC_CQE_RING_DEPTH_MAX;
+ }
+ }
+
+ cqe_sz = XSC_CQE_SIZE;
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u, size:%u\n", ncqe, cqe_sz);
+
+ if (xsc_alloc_cq_buf(to_xctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) {
+ xsc_err("Alloc cq buffer failed.\n");
+ errno = ENOMEM;
+ goto err_spl;
+ }
+
+ cq->arm_sn = 0;
+ cq->cqe_sz = cqe_sz;
+ cq->flags = cq_alloc_flags;
+
+ cmd_drv->buf_addr = (uintptr_t) cq->buf_a.buf;
+ cmd_drv->db_addr = (uintptr_t) cq->dbrec;
+ cmd_drv->cqe_size = cqe_sz;
+
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf_a.buf);
+
+ if (use_ex) {
+ struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr;
+
+ cq_attr_ex.cqe = ncqe;
+ ret = ibv_cmd_create_cq_ex(context, &cq_attr_ex, &cq->verbs_cq,
+ &cmd_ex.ibv_cmd, sizeof(cmd_ex),
+ &resp_ex.ibv_resp, sizeof(resp_ex),
+ 0);
+ } else {
+ ret = ibv_cmd_create_cq(context, ncqe, cq_attr->channel,
+ cq_attr->comp_vector,
+ ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex),
+ &cmd.ibv_cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp));
+ }
+
+ if (ret) {
+ xsc_err("ibv_cmd_create_cq failed,ret %d\n", ret);
+ goto err_buf;
+ }
+
+ cq->active_buf = &cq->buf_a;
+ cq->resize_buf = NULL;
+ cq->cqn = resp_drv->cqn;
+ cq->stall_enable = to_xctx(context)->stall_enable;
+ cq->stall_adaptive_enable = to_xctx(context)->stall_adaptive_enable;
+ cq->stall_cycles = to_xctx(context)->stall_cycles;
+
+ cq->db = xctx->cqm_reg_va +
+ (xctx->cqm_next_cid_reg & (xctx->page_size - 1));
+ cq->armdb =xctx->cqm_armdb_va +
+ (xctx->cqm_armdb & (xctx->page_size - 1));
+ cq->cqe_cnt = ncqe;
+ cq->log2_cq_ring_sz = xsc_ilog2(ncqe);
+
+ for (i = 0; i < ncqe; i++) {
+ struct xsc_cqe *cqe = (struct xsc_cqe *)(cq->active_buf->buf + i * cq->cqe_sz);
+ cqe->owner = 1;
+ }
+
+ env = getenv("XSC_DISABLE_FLUSH_ERROR");
+ cq->disable_flush_error_cqe = env ? true : false;
+ xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "cqe count:%u cqn:%u\n", cq->cqe_cnt, cq->cqn);
+ list_head_init(&cq->err_state_qp_list);
+ return &cq->verbs_cq.cq_ex;
+
+
+err_buf:
+ xsc_free_cq_buf(to_xctx(context), &cq->buf_a);
+
+err_spl:
+ xsc_spinlock_destroy(&cq->lock);
+
+err:
+ free(cq);
+
+ return NULL;
+}
+
+struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector)
+{
+ struct ibv_cq_ex *cq;
+ struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel,
+ .comp_vector = comp_vector,
+ .wc_flags = IBV_WC_STANDARD_FLAGS};
+
+ if (cqe <= 0) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ cq = create_cq(context, &cq_attr, 0, NULL);
+ return cq ? ibv_cq_ex_to_cq(cq) : NULL;
+}
+
+struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr)
+{
+ return create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, NULL);
+}
+
+struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr,
+ struct xscdv_cq_init_attr *xcq_attr)
+{
+ struct ibv_cq_ex *cq;
+
+ cq = create_cq(context, cq_attr, XSC_CQ_FLAGS_EXTENDED, xcq_attr);
+ if (!cq)
+ return NULL;
+
+ verbs_init_cq(ibv_cq_ex_to_cq(cq), context,
+ cq_attr->channel, cq_attr->cq_context);
+ return cq;
+}
+
+int xsc_resize_cq(struct ibv_cq *ibcq, int cqe)
+{
+ struct xsc_cq *cq = to_xcq(ibcq);
+
+ if (cqe < 0) {
+ errno = EINVAL;
+ return errno;
+ }
+
+ xsc_spin_lock(&cq->lock);
+ cq->active_cqes = cq->verbs_cq.cq_ex.cqe;
+ /* currently we don't change cqe size */
+ cq->resize_cqe_sz = cq->cqe_sz;
+ cq->resize_cqes = cq->verbs_cq.cq_ex.cqe;
+ xsc_spin_unlock(&cq->lock);
+ cq->resize_buf = NULL;
+ return 0;
+}
+
+int xsc_destroy_cq(struct ibv_cq *cq)
+{
+ int ret;
+ struct xsc_err_state_qp_node *tmp, *err_qp_node;
+
+ xsc_dbg(to_xctx(cq->context)->dbg_fp, XSC_DBG_CQ, "\n");
+ ret = ibv_cmd_destroy_cq(cq);
+ if (ret)
+ return ret;
+
+ list_for_each_safe(&to_xcq(cq)->err_state_qp_list, err_qp_node, tmp, entry) {
+ list_del(&err_qp_node->entry);
+ free(err_qp_node);
+ }
+
+ xsc_free_cq_buf(to_xctx(cq->context), to_xcq(cq)->active_buf);
+ free(to_xcq(cq));
+
+ return 0;
+}
+
+static int xsc_calc_sq_size(struct xsc_context *ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct xsc_qp *qp)
+{
+ int wqe_size;
+ int wq_size;
+ int wq_size_min = 0;
+
+ if (!attr->cap.max_send_wr)
+ return 0;
+
+ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->send_ds_shift);
+
+ wq_size = xsc_round_up_power_of_two(attr->cap.max_send_wr);
+
+ if (attr->qp_type != IBV_QPT_RAW_PACKET)
+ wq_size_min = XSC_SEND_WQE_RING_DEPTH_MIN;
+ if (wq_size < wq_size_min) {
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n",
+ wq_size, wq_size_min);
+ wq_size = wq_size_min;
+ }
+
+ if (wq_size > XSC_SEND_WQE_RING_DEPTH_MAX) {
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+ "WQE size %u exceeds WQE ring depth, set it as %u\n",
+ wq_size, XSC_SEND_WQE_RING_DEPTH_MAX);
+ wq_size = XSC_SEND_WQE_RING_DEPTH_MAX;
+ }
+
+ qp->max_inline_data = attr->cap.max_inline_data;
+ qp->sq.wqe_cnt = wq_size;
+ qp->sq.ds_cnt = wq_size << ctx->send_ds_shift;
+ qp->sq.seg_cnt = 1 << ctx->send_ds_shift;
+ qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift;
+ qp->sq.max_gs = attr->cap.max_send_sge;
+ qp->sq.max_post = qp->sq.wqe_cnt;
+ if (attr->cap.max_inline_data >
+ (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg))
+ return -EINVAL;
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Send WQE count:%u, max post:%u wqe shift:%u\n",
+ qp->sq.wqe_cnt, qp->sq.max_post, qp->sq.wqe_shift);
+
+ return wqe_size * qp->sq.wqe_cnt;
+}
+
+enum {
+ DV_CREATE_WQ_SUPPORTED_COMP_MASK = XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ
+};
+
+static int xsc_calc_rwq_size(struct xsc_context *ctx,
+ struct xsc_rwq *rwq,
+ struct ibv_wq_init_attr *attr,
+ struct xscdv_wq_init_attr *xwq_attr)
+{
+ size_t wqe_size;
+ int wq_size;
+ uint32_t num_scatter;
+ int is_mprq = 0;
+ int scat_spc;
+
+ if (!attr->max_wr)
+ return -EINVAL;
+ if (xwq_attr) {
+ if (!check_comp_mask(xwq_attr->comp_mask,
+ DV_CREATE_WQ_SUPPORTED_COMP_MASK))
+ return -EINVAL;
+
+ is_mprq = !!(xwq_attr->comp_mask &
+ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ);
+ }
+
+ /* TBD: check caps for RQ */
+ num_scatter = max_t(uint32_t, attr->max_sge, 1);
+ wqe_size = sizeof(struct xsc_wqe_data_seg) * num_scatter +
+ sizeof(struct xsc_wqe_srq_next_seg) * is_mprq;
+
+ if (rwq->wq_sig)
+ wqe_size += sizeof(struct xsc_rwqe_sig);
+
+ if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz)
+ return -EINVAL;
+
+ wqe_size = xsc_round_up_power_of_two(wqe_size);
+ wq_size = xsc_round_up_power_of_two(attr->max_wr) * wqe_size;
+ wq_size = max(wq_size, XSC_SEND_WQE_BB);
+ rwq->rq.wqe_cnt = wq_size / wqe_size;
+ rwq->rq.wqe_shift = xsc_ilog2(wqe_size);
+ rwq->rq.max_post = 1 << xsc_ilog2(wq_size / wqe_size);
+ scat_spc = wqe_size -
+ ((rwq->wq_sig) ? sizeof(struct xsc_rwqe_sig) : 0) -
+ is_mprq * sizeof(struct xsc_wqe_srq_next_seg);
+ rwq->rq.max_gs = scat_spc / sizeof(struct xsc_wqe_data_seg);
+ return wq_size;
+}
+
+static int xsc_calc_rq_size(struct xsc_context *ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct xsc_qp *qp)
+{
+ int wqe_size;
+ int wq_size;
+ int wq_size_min = 0;
+
+ if (!attr->cap.max_recv_wr)
+ return 0;
+
+ wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift);
+
+ wq_size = xsc_round_up_power_of_two(attr->cap.max_recv_wr);
+ /* due to hardware limit, rdma rq depth should be one send wqe ds num at least*/
+ if (attr->qp_type != IBV_QPT_RAW_PACKET)
+ wq_size_min = ctx->send_ds_num;
+ if (wq_size < wq_size_min) {
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "WQE size %u is not enough, set it as %u\n",
+ wq_size, wq_size_min);
+ wq_size = wq_size_min;
+ }
+
+ if (wq_size > XSC_RECV_WQE_RING_DEPTH_MAX) {
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+ "WQE size %u exceeds WQE ring depth, set it as %u\n",
+ wq_size, XSC_RECV_WQE_RING_DEPTH_MAX);
+ wq_size = XSC_RECV_WQE_RING_DEPTH_MAX;
+ }
+
+ qp->rq.wqe_cnt = wq_size;
+ qp->rq.ds_cnt = qp->rq.wqe_cnt << ctx->recv_ds_shift;
+ qp->rq.seg_cnt = 1 << ctx->recv_ds_shift;
+ qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift;
+ qp->rq.max_post = qp->rq.wqe_cnt;
+ qp->rq.max_gs = attr->cap.max_recv_sge;
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Recv WQE count:%u, max post:%u wqe shift:%u\n",
+ qp->rq.wqe_cnt, qp->rq.max_post, qp->rq.wqe_shift);
+ return wqe_size * qp->rq.wqe_cnt;
+}
+
+static int xsc_calc_wq_size(struct xsc_context *ctx,
+ struct ibv_qp_init_attr_ex *attr,
+ struct xsc_qp *qp)
+{
+ int ret;
+ int result;
+
+ ret = xsc_calc_sq_size(ctx, attr, qp);
+ if (ret < 0)
+ return ret;
+
+ result = ret;
+
+ ret = xsc_calc_rq_size(ctx, attr, qp);
+ if (ret < 0)
+ return ret;
+
+ result += ret;
+
+ qp->sq.offset = ret;
+ qp->rq.offset = 0;
+
+ return result;
+}
+
+static const char *qptype2key(enum ibv_qp_type type)
+{
+ switch (type) {
+ case IBV_QPT_RC: return "HUGE_RC";
+ case IBV_QPT_UC: return "HUGE_UC";
+ case IBV_QPT_UD: return "HUGE_UD";
+ case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH";
+ default: return "HUGE_NA";
+ }
+}
+
+static int xsc_alloc_qp_buf(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr,
+ struct xsc_qp *qp,
+ int size)
+{
+ int err;
+ enum xsc_alloc_type alloc_type;
+ enum xsc_alloc_type default_alloc_type = XSC_ALLOC_TYPE_ANON;
+ const char *qp_huge_key;
+
+ if (qp->sq.wqe_cnt) {
+ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid));
+ if (!qp->sq.wrid) {
+ errno = ENOMEM;
+ err = -1;
+ return err;
+ }
+
+ qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data));
+ if (!qp->sq.wr_data) {
+ errno = ENOMEM;
+ err = -1;
+ goto ex_wrid;
+ }
+
+ qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head));
+ if (!qp->sq.wqe_head) {
+ errno = ENOMEM;
+ err = -1;
+ goto ex_wrid;
+ }
+
+ qp->sq.need_flush = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.need_flush));
+ if (!qp->sq.need_flush) {
+ errno = ENOMEM;
+ err = -1;
+ goto ex_wrid;
+ }
+ memset(qp->sq.need_flush, 0, qp->sq.wqe_cnt);
+
+ qp->sq.wr_opcode = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_opcode));
+ if (!qp->sq.wr_opcode) {
+ errno = ENOMEM;
+ err = -1;
+ goto ex_wrid;
+ }
+ }
+
+ if (qp->rq.wqe_cnt) {
+ qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t));
+ if (!qp->rq.wrid) {
+ errno = ENOMEM;
+ err = -1;
+ goto ex_wrid;
+ }
+ }
+
+ /* compatibility support */
+ qp_huge_key = qptype2key(qp->ibv_qp->qp_type);
+ if (xsc_use_huge(qp_huge_key))
+ default_alloc_type = XSC_ALLOC_TYPE_HUGE;
+
+ xsc_get_alloc_type(to_xctx(context), XSC_QP_PREFIX, &alloc_type,
+ default_alloc_type);
+
+ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->buf,
+ align(qp->buf_size, to_xdev
+ (context->device)->page_size),
+ to_xdev(context->device)->page_size,
+ alloc_type,
+ XSC_QP_PREFIX);
+
+ if (err) {
+ err = -ENOMEM;
+ goto ex_wrid;
+ }
+
+ memset(qp->buf.buf, 0, qp->buf_size);
+
+ if (attr->qp_type == IBV_QPT_RAW_PACKET ||
+ qp->flags & XSC_QP_FLAGS_USE_UNDERLAY) {
+ size_t aligned_sq_buf_size = align(qp->sq_buf_size,
+ to_xdev(context->device)->page_size);
+ /* For Raw Packet QP, allocate a separate buffer for the SQ */
+ err = xsc_alloc_prefered_buf(to_xctx(context), &qp->sq_buf,
+ aligned_sq_buf_size,
+ to_xdev(context->device)->page_size,
+ alloc_type,
+ XSC_QP_PREFIX);
+ if (err) {
+ err = -ENOMEM;
+ goto rq_buf;
+ }
+
+ memset(qp->sq_buf.buf, 0, aligned_sq_buf_size);
+ }
+
+ return 0;
+rq_buf:
+ xsc_free_actual_buf(to_xctx(context), &qp->buf);
+ex_wrid:
+ if (qp->rq.wrid)
+ free(qp->rq.wrid);
+
+ if (qp->sq.wqe_head)
+ free(qp->sq.wqe_head);
+
+ if (qp->sq.wr_data)
+ free(qp->sq.wr_data);
+ if (qp->sq.wrid)
+ free(qp->sq.wrid);
+
+ if (qp->sq.need_flush)
+ free(qp->sq.need_flush);
+
+ if (qp->sq.wr_opcode)
+ free(qp->sq.wr_opcode);
+
+ return err;
+}
+
+static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp)
+{
+ xsc_free_actual_buf(ctx, &qp->buf);
+
+ if (qp->sq_buf.buf)
+ xsc_free_actual_buf(ctx, &qp->sq_buf);
+
+ if (qp->rq.wrid)
+ free(qp->rq.wrid);
+
+ if (qp->sq.wqe_head)
+ free(qp->sq.wqe_head);
+
+ if (qp->sq.wrid)
+ free(qp->sq.wrid);
+
+ if (qp->sq.wr_data)
+ free(qp->sq.wr_data);
+
+ if (qp->sq.need_flush)
+ free(qp->sq.need_flush);
+
+ if (qp->sq.wr_opcode)
+ free(qp->sq.wr_opcode);
+}
+
+enum {
+ XSC_CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD |
+ IBV_QP_INIT_ATTR_CREATE_FLAGS
+};
+
+enum {
+ XSC_DV_CREATE_QP_SUP_COMP_MASK = XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS |
+ XSCDV_QP_INIT_ATTR_MASK_DC
+};
+
+enum {
+ XSC_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS |
+ IBV_QP_INIT_ATTR_MAX_TSO_HEADER |
+ IBV_QP_INIT_ATTR_IND_TABLE |
+ IBV_QP_INIT_ATTR_RX_HASH),
+};
+
+enum {
+ XSCDV_QP_CREATE_SUP_FLAGS =
+ (XSCDV_QP_CREATE_TUNNEL_OFFLOADS |
+ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC |
+ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC |
+ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE |
+ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE),
+};
+
+static struct ibv_qp *create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr,
+ struct xscdv_qp_init_attr *xqp_attr)
+{
+ struct xsc_create_qp cmd;
+ struct xsc_create_qp_resp resp;
+ struct xsc_create_qp_ex_resp resp_ex;
+ struct xsc_qp *qp;
+ int ret;
+ struct xsc_context *ctx = to_xctx(context);
+ struct ibv_qp *ibqp;
+ struct xsc_parent_domain *xparent_domain;
+ struct xsc_device *xdev = to_xdev(context->device);
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "comp_mask=0x%x.\n", attr->comp_mask);
+
+ if (attr->comp_mask & ~XSC_CREATE_QP_SUP_COMP_MASK) {
+ xsc_err("Not supported comp_mask:0x%x\n", attr->comp_mask);
+ return NULL;
+ }
+
+ qp = calloc(1, sizeof(*qp));
+ if (!qp) {
+ xsc_err("QP calloc failed\n");
+ return NULL;
+ }
+
+ ibqp = (struct ibv_qp *)&qp->verbs_qp;
+ qp->ibv_qp = ibqp;
+
+ memset(&cmd, 0, sizeof(cmd));
+ memset(&resp, 0, sizeof(resp));
+ memset(&resp_ex, 0, sizeof(resp_ex));
+
+ ret = xsc_calc_wq_size(ctx, attr, qp);
+ if (ret < 0) {
+ xsc_err("Calculate WQ size failed\n");
+ errno = EINVAL;
+ goto err;
+ }
+
+ qp->buf_size = ret;
+ qp->sq_buf_size = 0;
+
+ if (xsc_alloc_qp_buf(context, attr, qp, ret)) {
+ xsc_err("Alloc QP buffer failed\n");
+ errno = ENOMEM;
+ goto err;
+ }
+
+ qp->sq_start = qp->buf.buf + qp->sq.offset;
+ qp->rq_start = qp->buf.buf + qp->rq.offset;
+ qp->sq.qend = qp->buf.buf + qp->sq.offset +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "sq start:%p, sq qend:%p, buffer size:%u\n",
+ qp->sq_start, qp->sq.qend, qp->buf_size);
+
+ xsc_init_qp_indices(qp);
+
+ if (xsc_spinlock_init_pd(&qp->sq.lock, attr->pd) ||
+ xsc_spinlock_init_pd(&qp->rq.lock, attr->pd))
+ goto err_free_qp_buf;
+
+ cmd.buf_addr = (uintptr_t) qp->buf.buf;
+ cmd.db_addr = (uintptr_t) qp->db;
+ cmd.sq_wqe_count = qp->sq.ds_cnt;
+ cmd.rq_wqe_count = qp->rq.ds_cnt;
+ cmd.rq_wqe_shift = qp->rq.wqe_shift;
+
+ if (attr->qp_type == IBV_QPT_RAW_PACKET) {
+ if (attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) {
+ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TSO) {
+ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO;/*revert to command flags*/
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+ "revert create_flags(0x%x) to cmd_flags(0x%x)\n",
+ attr->create_flags, cmd.flags);
+ }
+
+ if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TX) {
+ cmd.flags |= XSC_QP_FLAG_RAWPACKET_TX;/*revert to command flags*/
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+ "revert create_flags(0x%x) to cmd_flags(0x%x)\n",
+ attr->create_flags, cmd.flags);
+ }
+ attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS;
+ }
+ }
+
+ pthread_mutex_lock(&ctx->qp_table_mutex);
+
+ xparent_domain = to_xparent_domain(attr->pd);
+
+ ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr,
+ &cmd.ibv_cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp));
+ if (ret) {
+ xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret);
+ errno = ret;
+ goto err_free_uidx;
+ }
+
+ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+ ret = xsc_store_qp(ctx, ibqp->qp_num, qp);
+ if (ret) {
+ xsc_err("xsc_store_qp failed,ret %d\n", ret);
+ errno = EINVAL;
+ goto err_destroy;
+ }
+ }
+
+ pthread_mutex_unlock(&ctx->qp_table_mutex);
+
+ qp->rq.max_post = qp->rq.wqe_cnt;
+
+ if (attr->sq_sig_all)
+ qp->sq_signal_bits = 1;
+ else
+ qp->sq_signal_bits = 0;
+
+ attr->cap.max_send_wr = qp->sq.max_post;
+ attr->cap.max_recv_wr = qp->rq.max_post;
+ attr->cap.max_recv_sge = qp->rq.max_gs;
+
+ qp->rsc.type = XSC_RSC_TYPE_QP;
+ qp->rsc.rsn = ibqp->qp_num;
+
+ if (xparent_domain)
+ atomic_fetch_add(&xparent_domain->xpd.refcount, 1);
+
+ qp->rqn = ibqp->qp_num;
+ qp->sqn = ibqp->qp_num;
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "qp rqn:%u, sqn:%u\n", qp->rqn, qp->sqn);
+ qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1));
+ qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1));
+
+ return ibqp;
+
+err_destroy:
+ ibv_cmd_destroy_qp(ibqp);
+
+err_free_uidx:
+ pthread_mutex_unlock(&to_xctx(context)->qp_table_mutex);
+
+err_free_qp_buf:
+ xsc_free_qp_buf(ctx, qp);
+
+err:
+ free(qp);
+
+ return NULL;
+}
+
+struct ibv_qp *xsc_create_qp(struct ibv_pd *pd,
+ struct ibv_qp_init_attr *attr)
+{
+ struct ibv_qp *qp;
+ struct ibv_qp_init_attr_ex attrx;
+
+ memset(&attrx, 0, sizeof(attrx));
+ memcpy(&attrx, attr, sizeof(*attr));
+ attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
+ attrx.pd = pd;
+ qp = create_qp(pd->context, &attrx, NULL);
+ if (qp)
+ memcpy(attr, &attrx, sizeof(*attr));
+
+ return qp;
+}
+
+static void xsc_lock_cqs(struct ibv_qp *qp)
+{
+ struct xsc_cq *send_cq = to_xcq(qp->send_cq);
+ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq);
+
+ if (send_cq && recv_cq) {
+ if (send_cq == recv_cq) {
+ xsc_spin_lock(&send_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ xsc_spin_lock(&send_cq->lock);
+ xsc_spin_lock(&recv_cq->lock);
+ } else {
+ xsc_spin_lock(&recv_cq->lock);
+ xsc_spin_lock(&send_cq->lock);
+ }
+ } else if (send_cq) {
+ xsc_spin_lock(&send_cq->lock);
+ } else if (recv_cq) {
+ xsc_spin_lock(&recv_cq->lock);
+ }
+}
+
+static void xsc_unlock_cqs(struct ibv_qp *qp)
+{
+ struct xsc_cq *send_cq = to_xcq(qp->send_cq);
+ struct xsc_cq *recv_cq = to_xcq(qp->recv_cq);
+
+ if (send_cq && recv_cq) {
+ if (send_cq == recv_cq) {
+ xsc_spin_unlock(&send_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ xsc_spin_unlock(&recv_cq->lock);
+ xsc_spin_unlock(&send_cq->lock);
+ } else {
+ xsc_spin_unlock(&send_cq->lock);
+ xsc_spin_unlock(&recv_cq->lock);
+ }
+ } else if (send_cq) {
+ xsc_spin_unlock(&send_cq->lock);
+ } else if (recv_cq) {
+ xsc_spin_unlock(&recv_cq->lock);
+ }
+}
+
+int xsc_destroy_qp(struct ibv_qp *ibqp)
+{
+ struct xsc_qp *qp = to_xqp(ibqp);
+ struct xsc_context *ctx = to_xctx(ibqp->context);
+ int ret;
+ struct xsc_parent_domain *xparent_domain = to_xparent_domain(ibqp->pd);
+ struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node;
+
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "\n");
+
+ pthread_mutex_lock(&ctx->qp_table_mutex);
+
+ ret = ibv_cmd_destroy_qp(ibqp);
+ if (ret) {
+ pthread_mutex_unlock(&ctx->qp_table_mutex);
+ return ret;
+ }
+
+ xsc_lock_cqs(ibqp);
+
+ list_for_each_safe(&to_xcq(ibqp->recv_cq)->err_state_qp_list, err_rq_node, tmp, entry) {
+ if (err_rq_node->qp_id == qp->rsc.rsn) {
+ list_del(&err_rq_node->entry);
+ free(err_rq_node);
+ }
+ }
+
+ list_for_each_safe(&to_xcq(ibqp->send_cq)->err_state_qp_list, err_sq_node, tmp, entry) {
+ if (err_sq_node->qp_id == qp->rsc.rsn) {
+ list_del(&err_sq_node->entry);
+ free(err_sq_node);
+ }
+ }
+
+ __xsc_cq_clean(to_xcq(ibqp->recv_cq), qp->rsc.rsn);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ __xsc_cq_clean(to_xcq(ibqp->send_cq), qp->rsc.rsn);
+
+ if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+ xsc_clear_qp(ctx, ibqp->qp_num);
+
+ xsc_unlock_cqs(ibqp);
+ pthread_mutex_unlock(&ctx->qp_table_mutex);
+
+ xsc_free_qp_buf(ctx, qp);
+
+ if (xparent_domain)
+ atomic_fetch_sub(&xparent_domain->xpd.refcount, 1);
+
+ free(qp);
+
+ return 0;
+}
+
+int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
+ int attr_mask, struct ibv_qp_init_attr *init_attr)
+{
+ struct ibv_query_qp cmd;
+ struct xsc_qp *qp = to_xqp(ibqp);
+ int ret;
+
+ xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP, "\n");
+
+ if (qp->rss_qp)
+ return ENOSYS;
+
+ ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ init_attr->cap.max_send_wr = qp->sq.max_post;
+ init_attr->cap.max_send_sge = qp->sq.max_gs;
+ init_attr->cap.max_inline_data = qp->max_inline_data;
+
+ attr->cap = init_attr->cap;
+
+ return 0;
+}
+
+enum {
+ XSC_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT,
+};
+
+int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ int attr_mask)
+{
+ struct ibv_modify_qp cmd = {};
+ struct xsc_qp *xqp = to_xqp(qp);
+ int ret;
+
+ xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, "\n");
+ ret = ibv_cmd_modify_qp(qp, attr, attr_mask,
+ &cmd, sizeof(cmd));
+
+ if (!ret && (attr_mask & IBV_QP_STATE) &&
+ attr->qp_state == IBV_QPS_RESET) {
+ if (qp->recv_cq) {
+ xsc_cq_clean(to_xcq(qp->recv_cq), xqp->rsc.rsn);
+ }
+ if (qp->send_cq != qp->recv_cq && qp->send_cq)
+ xsc_cq_clean(to_xcq(qp->send_cq),
+ to_xqp(qp)->rsc.rsn);
+
+ xsc_init_qp_indices(xqp);
+ }
+
+ if (!ret && (attr_mask & IBV_QP_STATE))
+ qp->state = attr->qp_state;
+
+ /*workaround: generate flush err cqe if qp status turns to ERR*/
+ if (!ret && (attr_mask & IBV_QP_STATE))
+ ret = xsc_err_state_qp(qp, attr->cur_qp_state, attr->qp_state);
+
+ return ret;
+}
+
+int xsc_modify_qp_rate_limit(struct ibv_qp *qp,
+ struct ibv_qp_rate_limit_attr *attr)
+{
+ struct ibv_qp_attr qp_attr = {};
+ struct ib_uverbs_ex_modify_qp_resp resp = {};
+ struct xsc_modify_qp cmd = {};
+ struct xsc_context *xctx = to_xctx(qp->context);
+ int ret;
+
+ if (attr->comp_mask)
+ return EINVAL;
+
+ if ((attr->max_burst_sz ||
+ attr->typical_pkt_sz) &&
+ (!attr->rate_limit ||
+ !(xctx->packet_pacing_caps.cap_flags &
+ XSC_IB_PP_SUPPORT_BURST)))
+ return EINVAL;
+
+ cmd.burst_info.max_burst_sz = attr->max_burst_sz;
+ cmd.burst_info.typical_pkt_sz = attr->typical_pkt_sz;
+ qp_attr.rate_limit = attr->rate_limit;
+
+ ret = ibv_cmd_modify_qp_ex(qp, &qp_attr, IBV_QP_RATE_LIMIT,
+ &cmd.ibv_cmd,
+ sizeof(cmd),
+ &resp,
+ sizeof(resp));
+
+ return ret;
+}
+
+/*
+ * IB spec version 1.3. Table 224 Rate to xsc rate
+ * conversion table on best effort basis.
+ */
+static const uint8_t ib_to_xsc_rate_table[] = {
+ 0, /* Invalid to unlimited */
+ 0, /* Invalid to unlimited */
+ 7, /* 2.5 Gbps */
+ 8, /* 10Gbps */
+ 9, /* 30Gbps */
+ 10, /* 5 Gbps */
+ 11, /* 20 Gbps */
+ 12, /* 40 Gbps */
+ 13, /* 60 Gbps */
+ 14, /* 80 Gbps */
+ 15, /* 120 Gbps */
+ 11, /* 14 Gbps to 20 Gbps */
+ 13, /* 56 Gbps to 60 Gbps */
+ 15, /* 112 Gbps to 120 Gbps */
+ 0, /* 168 Gbps to unlimited */
+ 9, /* 25 Gbps to 30 Gbps */
+ 15, /* 100 Gbps to 120 Gbps */
+ 0, /* 200 Gbps to unlimited */
+ 0, /* 300 Gbps to unlimited */
+};
+
+static uint8_t ah_attr_to_xsc_rate(enum ibv_rate ah_static_rate)
+{
+ if (ah_static_rate >= ARRAY_SIZE(ib_to_xsc_rate_table))
+ return 0;
+ return ib_to_xsc_rate_table[ah_static_rate];
+}
+
+#define RROCE_UDP_SPORT_MIN 0xC000
+#define RROCE_UDP_SPORT_MAX 0xFFFF
+struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+ struct xsc_context *ctx = to_xctx(pd->context);
+ struct ibv_port_attr port_attr;
+ struct xsc_ah *ah;
+ uint8_t static_rate;
+ uint32_t gid_type;
+ __be32 tmp;
+ uint8_t grh;
+ bool is_eth;
+ bool grh_req;
+
+ if (attr->port_num < 1 || attr->port_num > ctx->num_ports)
+ return NULL;
+
+ if (ctx->cached_link_layer[attr->port_num - 1]) {
+ is_eth = ctx->cached_link_layer[attr->port_num - 1] ==
+ IBV_LINK_LAYER_ETHERNET;
+ grh_req = ctx->cached_port_flags[attr->port_num - 1] &
+ IBV_QPF_GRH_REQUIRED;
+ } else {
+ if (ibv_query_port(pd->context, attr->port_num, &port_attr))
+ return NULL;
+
+ is_eth = port_attr.link_layer == IBV_LINK_LAYER_ETHERNET;
+ grh_req = port_attr.flags & IBV_QPF_GRH_REQUIRED;
+ }
+
+ if (unlikely((!attr->is_global) && (is_eth || grh_req))) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ ah = calloc(1, sizeof *ah);
+ if (!ah)
+ return NULL;
+
+ static_rate = ah_attr_to_xsc_rate(attr->static_rate);
+ if (is_eth) {
+ if (ibv_query_gid_type(pd->context, attr->port_num,
+ attr->grh.sgid_index, &gid_type))
+ goto err;
+
+ if (gid_type == IBV_GID_TYPE_ROCE_V2)
+ ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1
+ - RROCE_UDP_SPORT_MIN)
+ + RROCE_UDP_SPORT_MIN);
+ /* Since RoCE packets must contain GRH, this bit is reserved
+ * for RoCE and shouldn't be set.
+ */
+ grh = 0;
+ ah->av.stat_rate_sl = (static_rate << 4) | ((attr->sl & 0x7) << 1);
+ } else {
+ ah->av.fl_mlid = attr->src_path_bits & 0x7f;
+ ah->av.rlid = htobe16(attr->dlid);
+ grh = 1;
+ ah->av.stat_rate_sl = (static_rate << 4) | (attr->sl & 0xf);
+ }
+ if (attr->is_global) {
+ ah->av.tclass = attr->grh.traffic_class;
+ ah->av.hop_limit = attr->grh.hop_limit;
+ tmp = htobe32((grh << 30) |
+ ((attr->grh.sgid_index & 0xff) << 20) |
+ (attr->grh.flow_label & 0xfffff));
+ ah->av.grh_gid_fl = tmp;
+ memcpy(ah->av.rgid, attr->grh.dgid.raw, 16);
+ }
+
+ if (is_eth) {
+ if (ctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_CREATE_AH) {
+ struct xsc_create_ah_resp resp = {};
+
+ if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)))
+ goto err;
+
+ ah->kern_ah = true;
+ memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE);
+ } else {
+ uint16_t vid;
+
+ if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
+ ah->av.rmac, &vid))
+ goto err;
+ }
+ }
+
+ return &ah->ibv_ah;
+err:
+ free(ah);
+ return NULL;
+}
+
+int xsc_destroy_ah(struct ibv_ah *ah)
+{
+ struct xsc_ah *xah = to_xah(ah);
+ int err;
+
+ if (xah->kern_ah) {
+ err = ibv_cmd_destroy_ah(ah);
+ if (err)
+ return err;
+ }
+
+ free(xah);
+ return 0;
+}
+
+int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
+{
+ return ibv_cmd_attach_mcast(qp, gid, lid);
+}
+
+int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
+{
+ return ibv_cmd_detach_mcast(qp, gid, lid);
+}
+
+struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr)
+{
+ return create_qp(context, attr, NULL);
+}
+
+struct ibv_qp *xscdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct xscdv_qp_init_attr *xqp_attr)
+{
+ return create_qp(context, qp_attr, xqp_attr);
+}
+
+struct ibv_xrcd *
+xsc_open_xrcd(struct ibv_context *context,
+ struct ibv_xrcd_init_attr *xrcd_init_attr)
+{
+ int err;
+ struct verbs_xrcd *xrcd;
+ struct ibv_open_xrcd cmd = {};
+ struct ib_uverbs_open_xrcd_resp resp = {};
+
+ xrcd = calloc(1, sizeof(*xrcd));
+ if (!xrcd)
+ return NULL;
+
+ err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr,
+ &cmd, sizeof(cmd), &resp, sizeof(resp));
+ if (err) {
+ free(xrcd);
+ return NULL;
+ }
+
+ return &xrcd->xrcd;
+}
+
+int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+ struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+ int ret;
+
+ ret = ibv_cmd_close_xrcd(xrcd);
+ if (!ret)
+ free(xrcd);
+
+ return ret;
+}
+
+int xsc_query_device_ex(struct ibv_context *context,
+ const struct ibv_query_device_ex_input *input,
+ struct ibv_device_attr_ex *attr,
+ size_t attr_size)
+{
+ struct xsc_context *xctx = to_xctx(context);
+ struct xsc_query_device_ex_resp resp = {};
+ size_t resp_size =
+ (xctx->cmds_supp_uhw & XSC_USER_CMDS_SUPP_UHW_QUERY_DEVICE) ?
+ sizeof(resp) :
+ sizeof(resp.ibv_resp);
+ struct ibv_device_attr *a;
+ union xsc_ib_fw_ver raw_fw_ver;
+ int err;
+
+ raw_fw_ver.data = 0;
+ err = ibv_cmd_query_device_any(context, input, attr, attr_size,
+ &resp.ibv_resp, &resp_size);
+ if (err)
+ return err;
+
+ if (attr_size >= offsetofend(struct ibv_device_attr_ex, tso_caps)) {
+ attr->tso_caps.max_tso = resp.tso_caps.max_tso;
+ attr->tso_caps.supported_qpts = resp.tso_caps.supported_qpts;
+ }
+ if (attr_size >= offsetofend(struct ibv_device_attr_ex, rss_caps)) {
+ attr->rss_caps.rx_hash_fields_mask =
+ resp.rss_caps.rx_hash_fields_mask;
+ attr->rss_caps.rx_hash_function =
+ resp.rss_caps.rx_hash_function;
+ }
+ if (attr_size >=
+ offsetofend(struct ibv_device_attr_ex, packet_pacing_caps)) {
+ attr->packet_pacing_caps.qp_rate_limit_min =
+ resp.packet_pacing_caps.qp_rate_limit_min;
+ attr->packet_pacing_caps.qp_rate_limit_max =
+ resp.packet_pacing_caps.qp_rate_limit_max;
+ attr->packet_pacing_caps.supported_qpts =
+ resp.packet_pacing_caps.supported_qpts;
+ }
+
+ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_ALLOW_MPW)
+ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED;
+
+ if (resp.xsc_ib_support_multi_pkt_send_wqes & XSC_IB_SUPPORT_EMPW)
+ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW;
+
+ xctx->cqe_comp_caps.max_num = resp.cqe_comp_caps.max_num;
+ xctx->cqe_comp_caps.supported_format = resp.cqe_comp_caps.supported_format;
+ xctx->sw_parsing_caps.sw_parsing_offloads =
+ resp.sw_parsing_caps.sw_parsing_offloads;
+ xctx->sw_parsing_caps.supported_qpts =
+ resp.sw_parsing_caps.supported_qpts;
+ xctx->striding_rq_caps.min_single_stride_log_num_of_bytes =
+ resp.striding_rq_caps.min_single_stride_log_num_of_bytes;
+ xctx->striding_rq_caps.max_single_stride_log_num_of_bytes =
+ resp.striding_rq_caps.max_single_stride_log_num_of_bytes;
+ xctx->striding_rq_caps.min_single_wqe_log_num_of_strides =
+ resp.striding_rq_caps.min_single_wqe_log_num_of_strides;
+ xctx->striding_rq_caps.max_single_wqe_log_num_of_strides =
+ resp.striding_rq_caps.max_single_wqe_log_num_of_strides;
+ xctx->striding_rq_caps.supported_qpts =
+ resp.striding_rq_caps.supported_qpts;
+ xctx->tunnel_offloads_caps = resp.tunnel_offloads_caps;
+ xctx->packet_pacing_caps = resp.packet_pacing_caps;
+
+ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP)
+ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP;
+
+ if (resp.flags & XSC_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD)
+ xctx->vendor_cap_flags |= XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD;
+
+ raw_fw_ver.data = resp.ibv_resp.base.fw_ver;
+ a = &attr->orig_attr;
+ xsc_set_fw_version(a, &raw_fw_ver);
+
+ return 0;
+}
+
+static int rwq_sig_enabled(struct ibv_context *context)
+{
+ char *env;
+
+ env = getenv("XSC_RWQ_SIGNATURE");
+ if (env)
+ return 1;
+
+ return 0;
+}
+
+static void xsc_free_rwq_buf(struct xsc_rwq *rwq, struct ibv_context *context)
+{
+ struct xsc_context *ctx = to_xctx(context);
+
+ xsc_free_actual_buf(ctx, &rwq->buf);
+ free(rwq->rq.wrid);
+}
+
+static int xsc_alloc_rwq_buf(struct ibv_context *context,
+ struct xsc_rwq *rwq,
+ int size)
+{
+ int err;
+ enum xsc_alloc_type alloc_type;
+
+ xsc_get_alloc_type(to_xctx(context), XSC_RWQ_PREFIX,
+ &alloc_type, XSC_ALLOC_TYPE_ANON);
+
+ rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t));
+ if (!rwq->rq.wrid) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ err = xsc_alloc_prefered_buf(to_xctx(context), &rwq->buf,
+ align(rwq->buf_size, to_xdev
+ (context->device)->page_size),
+ to_xdev(context->device)->page_size,
+ alloc_type,
+ XSC_RWQ_PREFIX);
+
+ if (err) {
+ free(rwq->rq.wrid);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct ibv_wq *create_wq(struct ibv_context *context,
+ struct ibv_wq_init_attr *attr,
+ struct xscdv_wq_init_attr *xwq_attr)
+{
+ struct xsc_create_wq cmd;
+ struct xsc_create_wq_resp resp;
+ int err;
+ struct xsc_rwq *rwq;
+ struct xsc_context *ctx = to_xctx(context);
+ int ret;
+ int32_t usr_idx = 0;
+
+ if (attr->wq_type != IBV_WQT_RQ)
+ return NULL;
+
+ memset(&cmd, 0, sizeof(cmd));
+ memset(&resp, 0, sizeof(resp));
+
+ rwq = calloc(1, sizeof(*rwq));
+ if (!rwq)
+ return NULL;
+
+ rwq->wq_sig = rwq_sig_enabled(context);
+ if (rwq->wq_sig)
+ cmd.flags = XSC_WQ_FLAG_SIGNATURE;
+
+ ret = xsc_calc_rwq_size(ctx, rwq, attr, xwq_attr);
+ if (ret < 0) {
+ errno = -ret;
+ goto err;
+ }
+
+ rwq->buf_size = ret;
+ if (xsc_alloc_rwq_buf(context, rwq, ret))
+ goto err;
+
+ xsc_init_rwq_indices(rwq);
+
+ if (xsc_spinlock_init_pd(&rwq->rq.lock, attr->pd))
+ goto err_free_rwq_buf;
+
+ rwq->db = xsc_alloc_dbrec(ctx);
+ if (!rwq->db)
+ goto err_free_rwq_buf;
+
+ rwq->db[XSC_RCV_DBR] = 0;
+ rwq->db[XSC_SND_DBR] = 0;
+ rwq->pbuff = rwq->buf.buf + rwq->rq.offset;
+ rwq->recv_db = &rwq->db[XSC_RCV_DBR];
+ cmd.buf_addr = (uintptr_t)rwq->buf.buf;
+ cmd.db_addr = (uintptr_t)rwq->db;
+ cmd.rq_wqe_count = rwq->rq.wqe_cnt;
+ cmd.rq_wqe_shift = rwq->rq.wqe_shift;
+ usr_idx = xsc_store_uidx(ctx, rwq);
+ if (usr_idx < 0) {
+ xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "Couldn't find free user index\n");
+ goto err_free_db_rec;
+ }
+
+ cmd.user_index = usr_idx;
+
+ if (xwq_attr) {
+ if (xwq_attr->comp_mask & XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ) {
+ if ((xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes <
+ ctx->striding_rq_caps.min_single_stride_log_num_of_bytes) ||
+ (xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes >
+ ctx->striding_rq_caps.max_single_stride_log_num_of_bytes)) {
+ errno = EINVAL;
+ goto err_create;
+ }
+
+ if ((xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides <
+ ctx->striding_rq_caps.min_single_wqe_log_num_of_strides) ||
+ (xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides >
+ ctx->striding_rq_caps.max_single_wqe_log_num_of_strides)) {
+ errno = EINVAL;
+ goto err_create;
+ }
+
+ cmd.single_stride_log_num_of_bytes =
+ xwq_attr->striding_rq_attrs.single_stride_log_num_of_bytes;
+ cmd.single_wqe_log_num_of_strides =
+ xwq_attr->striding_rq_attrs.single_wqe_log_num_of_strides;
+ cmd.two_byte_shift_en =
+ xwq_attr->striding_rq_attrs.two_byte_shift_en;
+ cmd.comp_mask |= XSC_IB_CREATE_WQ_STRIDING_RQ;
+ }
+ }
+
+ err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd,
+ sizeof(cmd), &resp.ibv_resp, sizeof(resp));
+ if (err)
+ goto err_create;
+
+ rwq->rsc.type = XSC_RSC_TYPE_RWQ;
+ rwq->rsc.rsn = cmd.user_index;
+
+ rwq->wq.post_recv = xsc_post_wq_recv;
+ return &rwq->wq;
+
+err_create:
+ xsc_clear_uidx(ctx, cmd.user_index);
+err_free_db_rec:
+ xsc_free_db(to_xctx(context), rwq->db);
+err_free_rwq_buf:
+ xsc_free_rwq_buf(rwq, context);
+err:
+ free(rwq);
+ return NULL;
+}
+
+struct ibv_wq *xsc_create_wq(struct ibv_context *context,
+ struct ibv_wq_init_attr *attr)
+{
+ return create_wq(context, attr, NULL);
+}
+
+struct ibv_wq *xscdv_create_wq(struct ibv_context *context,
+ struct ibv_wq_init_attr *attr,
+ struct xscdv_wq_init_attr *xwq_attr)
+{
+ return create_wq(context, attr, xwq_attr);
+}
+
+int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr)
+{
+ struct xsc_modify_wq cmd = {};
+ struct xsc_rwq *rwq = to_xrwq(wq);
+
+ if ((attr->attr_mask & IBV_WQ_ATTR_STATE) &&
+ attr->wq_state == IBV_WQS_RDY) {
+ if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) &&
+ attr->curr_wq_state != wq->state)
+ return -EINVAL;
+
+ if (wq->state == IBV_WQS_RESET) {
+ xsc_spin_lock(&to_xcq(wq->cq)->lock);
+ __xsc_cq_clean(to_xcq(wq->cq),
+ rwq->rsc.rsn);
+ xsc_spin_unlock(&to_xcq(wq->cq)->lock);
+ xsc_init_rwq_indices(rwq);
+ rwq->db[XSC_RCV_DBR] = 0;
+ rwq->db[XSC_SND_DBR] = 0;
+ }
+ }
+
+ return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd));
+}
+
+int xsc_destroy_wq(struct ibv_wq *wq)
+{
+ struct xsc_rwq *rwq = to_xrwq(wq);
+ int ret;
+
+ ret = ibv_cmd_destroy_wq(wq);
+ if (ret)
+ return ret;
+
+ xsc_spin_lock(&to_xcq(wq->cq)->lock);
+ __xsc_cq_clean(to_xcq(wq->cq), rwq->rsc.rsn);
+ xsc_spin_unlock(&to_xcq(wq->cq)->lock);
+ xsc_clear_uidx(to_xctx(wq->context), rwq->rsc.rsn);
+ xsc_free_db(to_xctx(wq->context), rwq->db);
+ xsc_free_rwq_buf(rwq, wq->context);
+ free(rwq);
+
+ return 0;
+}
+
+static void free_flow_counters_descriptions(struct xsc_ib_create_flow *cmd)
+{
+ int i;
+
+ for (i = 0; i < cmd->ncounters_data; i++)
+ free(cmd->data[i].counters_data);
+}
+
+static int get_flow_mcounters(struct xsc_flow *mflow,
+ struct ibv_flow_attr *flow_attr,
+ struct xsc_counters **mcounters,
+ uint32_t *data_size)
+{
+ struct ibv_flow_spec *ib_spec;
+ uint32_t ncounters_used = 0;
+ int i;
+
+ ib_spec = (struct ibv_flow_spec *)(flow_attr + 1);
+ for (i = 0; i < flow_attr->num_of_specs; i++, ib_spec = (void *)ib_spec + ib_spec->hdr.size) {
+ if (ib_spec->hdr.type != IBV_FLOW_SPEC_ACTION_COUNT)
+ continue;
+
+ /* currently support only one counters data */
+ if (ncounters_used > 0)
+ return EINVAL;
+
+ *mcounters = to_mcounters(ib_spec->flow_count.counters);
+ ncounters_used++;
+ }
+
+ *data_size = ncounters_used * sizeof(struct xsc_ib_flow_counters_data);
+ return 0;
+}
+
+static int allocate_flow_counters_descriptions(struct xsc_counters *mcounters,
+ struct xsc_ib_create_flow *cmd)
+{
+ struct xsc_ib_flow_counters_data *mcntrs_data;
+ struct xsc_ib_flow_counters_desc *cntrs_data;
+ struct xsc_counter_node *cntr_node;
+ uint32_t ncounters;
+ int j = 0;
+
+ mcntrs_data = cmd->data;
+ ncounters = mcounters->ncounters;
+
+ /* xsc_attach_counters_point_flow was never called */
+ if (!ncounters)
+ return EINVAL;
+
+ /* each counter has both index and description */
+ cntrs_data = calloc(ncounters, sizeof(*cntrs_data));
+ if (!cntrs_data)
+ return ENOMEM;
+
+ list_for_each(&mcounters->counters_list, cntr_node, entry) {
+ cntrs_data[j].description = cntr_node->desc;
+ cntrs_data[j].index = cntr_node->index;
+ j++;
+ }
+
+ scrub_ptr_attr(cntrs_data);
+ mcntrs_data[cmd->ncounters_data].counters_data = cntrs_data;
+ mcntrs_data[cmd->ncounters_data].ncounters = ncounters;
+ cmd->ncounters_data++;
+
+ return 0;
+}
+
+struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr)
+{
+ struct xsc_ib_create_flow *cmd;
+ uint32_t required_cmd_size = 0;
+ struct ibv_flow *flow_id;
+ struct xsc_flow *mflow;
+ int ret;
+
+ mflow = calloc(1, sizeof(*mflow));
+ if (!mflow) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = get_flow_mcounters(mflow, flow_attr, &mflow->mcounters, &required_cmd_size);
+ if (ret) {
+ errno = ret;
+ goto err_get_mcounters;
+ }
+
+ required_cmd_size += sizeof(*cmd);
+ cmd = calloc(1, required_cmd_size);
+ if (!cmd) {
+ errno = ENOMEM;
+ goto err_get_mcounters;
+ }
+
+ if (mflow->mcounters) {
+ pthread_mutex_lock(&mflow->mcounters->lock);
+ /* if the counters already bound no need to pass its description */
+ if (!mflow->mcounters->refcount) {
+ ret = allocate_flow_counters_descriptions(mflow->mcounters, cmd);
+ if (ret) {
+ errno = ret;
+ goto err_desc_alloc;
+ }
+ }
+ }
+
+ flow_id = &mflow->flow_id;
+ ret = ibv_cmd_create_flow(qp, flow_id, flow_attr,
+ cmd, required_cmd_size);
+ if (ret)
+ goto err_create_flow;
+
+ if (mflow->mcounters) {
+ free_flow_counters_descriptions(cmd);
+ mflow->mcounters->refcount++;
+ pthread_mutex_unlock(&mflow->mcounters->lock);
+ }
+
+ free(cmd);
+
+ return flow_id;
+
+err_create_flow:
+ if (mflow->mcounters) {
+ free_flow_counters_descriptions(cmd);
+ pthread_mutex_unlock(&mflow->mcounters->lock);
+ }
+err_desc_alloc:
+ free(cmd);
+err_get_mcounters:
+ free(mflow);
+ return NULL;
+}
+
+int xsc_destroy_flow(struct ibv_flow *flow_id)
+{
+ struct xsc_flow *mflow = to_mflow(flow_id);
+ int ret;
+
+ ret = ibv_cmd_destroy_flow(flow_id);
+ if (ret)
+ return ret;
+
+ if (mflow->mcounters) {
+ pthread_mutex_lock(&mflow->mcounters->lock);
+ mflow->mcounters->refcount--;
+ pthread_mutex_unlock(&mflow->mcounters->lock);
+ }
+
+ free(mflow);
+ return 0;
+}
+
+struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context,
+ struct ibv_rwq_ind_table_init_attr *init_attr)
+{
+ struct ibv_create_rwq_ind_table *cmd;
+ struct xsc_create_rwq_ind_table_resp resp;
+ struct ibv_rwq_ind_table *ind_table;
+ uint32_t required_tbl_size;
+ int num_tbl_entries;
+ int cmd_size;
+ int err;
+
+ num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
+ /* Data must be u64 aligned */
+ required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
+ sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
+
+ cmd_size = required_tbl_size + sizeof(*cmd);
+ cmd = calloc(1, cmd_size);
+ if (!cmd)
+ return NULL;
+
+ memset(&resp, 0, sizeof(resp));
+ ind_table = calloc(1, sizeof(*ind_table));
+ if (!ind_table)
+ goto free_cmd;
+
+ err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table,
+ &resp.ibv_resp, sizeof(resp));
+ if (err)
+ goto err;
+
+ free(cmd);
+ return ind_table;
+
+err:
+ free(ind_table);
+free_cmd:
+ free(cmd);
+ return NULL;
+}
+
+int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
+{
+ int ret;
+
+ ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table);
+
+ if (ret)
+ return ret;
+
+ free(rwq_ind_table);
+ return 0;
+}
+
+int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
+{
+ struct ibv_modify_cq cmd = {};
+
+ return ibv_cmd_modify_cq(cq, attr, &cmd, sizeof(cmd));
+}
+
+static struct ibv_flow_action *_xsc_create_flow_action_esp(struct ibv_context *ctx,
+ struct ibv_flow_action_esp_attr *attr,
+ struct ibv_command_buffer *driver_attr)
+{
+ struct verbs_flow_action *action;
+ int ret;
+
+ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ action = calloc(1, sizeof(*action));
+ if (!action) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = ibv_cmd_create_flow_action_esp(ctx, attr, action, driver_attr);
+ if (ret) {
+ free(action);
+ return NULL;
+ }
+
+ return &action->action;
+}
+
+struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx,
+ struct ibv_flow_action_esp_attr *attr)
+{
+ return _xsc_create_flow_action_esp(ctx, attr, NULL);
+}
+
+struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx,
+ struct ibv_flow_action_esp_attr *esp,
+ struct xscdv_flow_action_esp *xattr)
+{
+ DECLARE_COMMAND_BUFFER_LINK(driver_attr, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 1,
+ NULL);
+
+ if (!check_comp_mask(xattr->comp_mask,
+ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ if (xattr->comp_mask & XSCDV_FLOW_ACTION_ESP_MASK_FLAGS) {
+ if (!check_comp_mask(xattr->action_flags,
+ XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+ fill_attr_in_uint64(driver_attr, XSC_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ xattr->action_flags);
+ }
+
+ return _xsc_create_flow_action_esp(ctx, esp, driver_attr);
+}
+
+int xsc_modify_flow_action_esp(struct ibv_flow_action *action,
+ struct ibv_flow_action_esp_attr *attr)
+{
+ struct verbs_flow_action *vaction =
+ container_of(action, struct verbs_flow_action, action);
+
+ if (!check_comp_mask(attr->comp_mask, IBV_FLOW_ACTION_ESP_MASK_ESN))
+ return EOPNOTSUPP;
+
+ return ibv_cmd_modify_flow_action_esp(vaction, attr, NULL);
+}
+
+struct ibv_flow_action *xscdv_create_flow_action_modify_header(struct ibv_context *ctx,
+ size_t actions_sz,
+ uint64_t actions[],
+ enum xscdv_flow_table_type ft_type)
+{
+ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION,
+ XSC_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ 3);
+ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd,
+ XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct verbs_flow_action *action;
+ int ret;
+
+ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ actions, actions_sz);
+ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ ft_type);
+
+ action = calloc(1, sizeof(*action));
+ if (!action) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = execute_ioctl(ctx, cmd);
+ if (ret) {
+ free(action);
+ return NULL;
+ }
+
+ action->action.context = ctx;
+ action->type = IBV_FLOW_ACTION_UNSPECIFIED;
+ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ handle);
+
+ return &action->action;
+}
+
+struct ibv_flow_action *
+xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx,
+ size_t data_sz,
+ void *data,
+ enum xscdv_flow_action_packet_reformat_type reformat_type,
+ enum xscdv_flow_table_type ft_type)
+{
+ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW_ACTION,
+ XSC_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, 4);
+ struct ib_uverbs_attr *handle = fill_attr_out_obj(cmd,
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct verbs_flow_action *action;
+ int ret;
+
+ if ((!data && data_sz) || (data && !data_sz)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (data && data_sz)
+ fill_attr_in(cmd,
+ XSC_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ data, data_sz);
+
+ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ reformat_type);
+
+ fill_attr_const_in(cmd, XSC_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ ft_type);
+
+ action = calloc(1, sizeof(*action));
+ if (!action) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = execute_ioctl(ctx, cmd);
+ if (ret) {
+ free(action);
+ return NULL;
+ }
+
+ action->action.context = ctx;
+ action->type = IBV_FLOW_ACTION_UNSPECIFIED;
+ action->handle = read_attr_obj(XSC_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ handle);
+
+ return &action->action;
+}
+
+int xsc_destroy_flow_action(struct ibv_flow_action *action)
+{
+ struct verbs_flow_action *vaction =
+ container_of(action, struct verbs_flow_action, action);
+ int ret = ibv_cmd_destroy_flow_action(vaction);
+
+ if (!ret)
+ free(action);
+
+ return ret;
+}
+
+static inline int xsc_access_dm(struct ibv_dm *ibdm, uint64_t dm_offset,
+ void *host_addr, size_t length,
+ uint32_t read)
+{
+ struct xsc_dm *dm = to_xdm(ibdm);
+ atomic_uint32_t *dm_ptr =
+ (atomic_uint32_t *)dm->start_va + dm_offset / 4;
+ uint32_t *host_ptr = host_addr;
+ const uint32_t *host_end = host_ptr + length / 4;
+
+ if (dm_offset + length > dm->length)
+ return EFAULT;
+
+ /* Due to HW limitation, DM access address and length must be aligned
+ * to 4 bytes.
+ */
+ if ((length & 3) || (dm_offset & 3))
+ return EINVAL;
+
+ /* Copy granularity should be 4 Bytes since we enforce copy size to be
+ * a multiple of 4 bytes.
+ */
+ if (read) {
+ while (host_ptr != host_end) {
+ *host_ptr = atomic_load_explicit(dm_ptr,
+ memory_order_relaxed);
+ host_ptr++;
+ dm_ptr++;
+ }
+ } else {
+ while (host_ptr != host_end) {
+ atomic_store_explicit(dm_ptr, *host_ptr,
+ memory_order_relaxed);
+ host_ptr++;
+ dm_ptr++;
+ }
+ }
+
+ return 0;
+}
+static inline int xsc_memcpy_to_dm(struct ibv_dm *ibdm, uint64_t dm_offset,
+ const void *host_addr, size_t length)
+{
+ return xsc_access_dm(ibdm, dm_offset, (void *)host_addr, length, 0);
+}
+
+static inline int xsc_memcpy_from_dm(void *host_addr, struct ibv_dm *ibdm,
+ uint64_t dm_offset, size_t length)
+{
+ return xsc_access_dm(ibdm, dm_offset, host_addr, length, 1);
+}
+
+struct ibv_dm *xsc_alloc_dm(struct ibv_context *context,
+ struct ibv_alloc_dm_attr *dm_attr)
+{
+ DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
+ 2);
+ int page_size = to_xdev(context->device)->page_size;
+ struct xsc_context *xctx = to_xctx(context);
+ uint64_t act_size, start_offset;
+ struct xsc_dm *dm;
+ uint16_t page_idx;
+ off_t offset = 0;
+ void *va;
+
+ if (!check_comp_mask(dm_attr->comp_mask, 0)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (dm_attr->length > xctx->max_dm_size) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ dm = calloc(1, sizeof(*dm));
+ if (!dm) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+
+ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ fill_attr_out(cmdb, XSC_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+
+ if (ibv_cmd_alloc_dm(context, dm_attr, &dm->verbs_dm, cmdb))
+ goto err_free_mem;
+
+ act_size = align(dm_attr->length, page_size);
+ set_command(XSC_IB_MMAP_DEVICE_MEM, &offset);
+ set_extended_index(page_idx, &offset);
+ va = mmap(NULL, act_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, context->cmd_fd,
+ page_size * offset);
+ if (va == MAP_FAILED)
+ goto err_free_dm;
+
+ dm->mmap_va = va;
+ dm->length = dm_attr->length;
+ dm->start_va = va + (start_offset & (page_size - 1));
+ dm->verbs_dm.dm.memcpy_to_dm = xsc_memcpy_to_dm;
+ dm->verbs_dm.dm.memcpy_from_dm = xsc_memcpy_from_dm;
+
+ return &dm->verbs_dm.dm;
+
+err_free_dm:
+ ibv_cmd_free_dm(&dm->verbs_dm);
+
+err_free_mem:
+ free(dm);
+
+ return NULL;
+}
+
+int xsc_free_dm(struct ibv_dm *ibdm)
+{
+ struct xsc_device *xdev = to_xdev(ibdm->context->device);
+ struct xsc_dm *dm = to_xdm(ibdm);
+ size_t act_size = align(dm->length, xdev->page_size);
+ int ret;
+
+ ret = ibv_cmd_free_dm(&dm->verbs_dm);
+
+ if (ret)
+ return ret;
+
+ munmap(dm->mmap_va, act_size);
+ free(dm);
+ return 0;
+}
+
+struct ibv_counters *xsc_create_counters(struct ibv_context *context,
+ struct ibv_counters_init_attr *init_attr)
+{
+ struct xsc_counters *mcntrs;
+ int ret;
+
+ if (!check_comp_mask(init_attr->comp_mask, 0)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ mcntrs = calloc(1, sizeof(*mcntrs));
+ if (!mcntrs) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ pthread_mutex_init(&mcntrs->lock, NULL);
+ ret = ibv_cmd_create_counters(context,
+ init_attr,
+ &mcntrs->vcounters,
+ NULL);
+ if (ret)
+ goto err_create;
+
+ list_head_init(&mcntrs->counters_list);
+
+ return &mcntrs->vcounters.counters;
+
+err_create:
+ free(mcntrs);
+ return NULL;
+}
+
+int xsc_destroy_counters(struct ibv_counters *counters)
+{
+ struct xsc_counters *mcntrs = to_mcounters(counters);
+ struct xsc_counter_node *tmp, *cntrs_node;
+ int ret;
+
+ ret = ibv_cmd_destroy_counters(&mcntrs->vcounters);
+ if (ret)
+ return ret;
+
+ list_for_each_safe(&mcntrs->counters_list, cntrs_node, tmp, entry) {
+ list_del(&cntrs_node->entry);
+ free(cntrs_node);
+ }
+
+ free(mcntrs);
+ return 0;
+}
+
+int xsc_attach_counters_point_flow(struct ibv_counters *counters,
+ struct ibv_counter_attach_attr *attr,
+ struct ibv_flow *flow)
+{
+ struct xsc_counters *mcntrs = to_mcounters(counters);
+ struct xsc_counter_node *cntrs_node;
+ int ret;
+
+ /* The driver supports only the static binding mode as part of ibv_create_flow */
+ if (flow)
+ return ENOTSUP;
+
+ if (!check_comp_mask(attr->comp_mask, 0))
+ return EOPNOTSUPP;
+
+ /* Check whether the attached counter is supported */
+ if (attr->counter_desc < IBV_COUNTER_PACKETS ||
+ attr->counter_desc > IBV_COUNTER_BYTES)
+ return ENOTSUP;
+
+ cntrs_node = calloc(1, sizeof(*cntrs_node));
+ if (!cntrs_node)
+ return ENOMEM;
+
+ pthread_mutex_lock(&mcntrs->lock);
+ /* The counter is bound to a flow, attach is not allowed */
+ if (mcntrs->refcount) {
+ ret = EBUSY;
+ goto err_already_bound;
+ }
+
+ cntrs_node->index = attr->index;
+ cntrs_node->desc = attr->counter_desc;
+ list_add(&mcntrs->counters_list, &cntrs_node->entry);
+ mcntrs->ncounters++;
+ pthread_mutex_unlock(&mcntrs->lock);
+
+ return 0;
+
+err_already_bound:
+ pthread_mutex_unlock(&mcntrs->lock);
+ free(cntrs_node);
+ return ret;
+}
+
+int xsc_read_counters(struct ibv_counters *counters,
+ uint64_t *counters_value,
+ uint32_t ncounters,
+ uint32_t flags)
+{
+ struct xsc_counters *mcntrs = to_mcounters(counters);
+
+ return ibv_cmd_read_counters(&mcntrs->vcounters,
+ counters_value,
+ ncounters,
+ flags,
+ NULL);
+
+}
+
+struct xscdv_flow_matcher *
+xscdv_create_flow_matcher(struct ibv_context *context,
+ struct xscdv_flow_matcher_attr *attr)
+{
+ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER,
+ XSC_IB_METHOD_FLOW_MATCHER_CREATE,
+ 5);
+ struct xscdv_flow_matcher *flow_matcher;
+ struct ib_uverbs_attr *handle;
+ int ret;
+
+ if (!check_comp_mask(attr->comp_mask, 0)) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ flow_matcher = calloc(1, sizeof(*flow_matcher));
+ if (!flow_matcher) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ if (attr->type != IBV_FLOW_ATTR_NORMAL) {
+ errno = EOPNOTSUPP;
+ goto err;
+ }
+
+ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ attr->match_mask->match_buf,
+ attr->match_mask->match_sz);
+ fill_attr_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ &attr->match_criteria_enable, sizeof(attr->match_criteria_enable));
+ fill_attr_in_enum(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ IBV_FLOW_ATTR_NORMAL, &attr->priority,
+ sizeof(attr->priority));
+ if (attr->flags)
+ fill_attr_const_in(cmd, XSC_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ attr->flags);
+
+ ret = execute_ioctl(context, cmd);
+ if (ret)
+ goto err;
+
+ flow_matcher->context = context;
+ flow_matcher->handle = read_attr_obj(XSC_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, handle);
+
+ return flow_matcher;
+
+err:
+ free(flow_matcher);
+ return NULL;
+}
+
+int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *flow_matcher)
+{
+ DECLARE_COMMAND_BUFFER(cmd, XSC_IB_OBJECT_FLOW_MATCHER,
+ XSC_IB_METHOD_FLOW_MATCHER_DESTROY,
+ 1);
+ int ret;
+
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, flow_matcher->handle);
+ ret = execute_ioctl(flow_matcher->context, cmd);
+ verbs_is_destroy_err(&ret);
+
+ if (ret)
+ return ret;
+
+ free(flow_matcher);
+ return 0;
+}
+
+#define CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED 8
+struct ibv_flow *
+xscdv_create_flow(struct xscdv_flow_matcher *flow_matcher,
+ struct xscdv_flow_match_parameters *match_value,
+ size_t num_actions,
+ struct xscdv_flow_action_attr actions_attr[])
+{
+ uint32_t flow_actions[CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED];
+ struct verbs_flow_action *vaction;
+ int num_flow_actions = 0;
+ struct xsc_flow *mflow;
+ bool have_qp = false;
+ bool have_dest_devx = false;
+ bool have_flow_tag = false;
+ int ret;
+ int i;
+ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_FLOW,
+ XSC_IB_METHOD_CREATE_FLOW,
+ 6);
+ struct ib_uverbs_attr *handle;
+ enum xscdv_flow_action_type type;
+
+ mflow = calloc(1, sizeof(*mflow));
+ if (!mflow) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_HANDLE);
+ fill_attr_in(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ match_value->match_buf,
+ match_value->match_sz);
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_MATCHER, flow_matcher->handle);
+
+ for (i = 0; i < num_actions; i++) {
+ type = actions_attr[i].type;
+ switch (type) {
+ case XSCDV_FLOW_ACTION_DEST_IBV_QP:
+ if (have_qp || have_dest_devx) {
+ errno = EOPNOTSUPP;
+ goto err;
+ }
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_QP,
+ actions_attr[i].qp->handle);
+ have_qp = true;
+ break;
+ case XSCDV_FLOW_ACTION_IBV_FLOW_ACTION:
+ if (num_flow_actions ==
+ CREATE_FLOW_MAX_FLOW_ACTIONS_SUPPORTED) {
+ errno = EOPNOTSUPP;
+ goto err;
+ }
+ vaction = container_of(actions_attr[i].action,
+ struct verbs_flow_action,
+ action);
+
+ flow_actions[num_flow_actions] = vaction->handle;
+ num_flow_actions++;
+ break;
+ case XSCDV_FLOW_ACTION_DEST_DEVX:
+ if (have_dest_devx || have_qp) {
+ errno = EOPNOTSUPP;
+ goto err;
+ }
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ actions_attr[i].obj->handle);
+ have_dest_devx = true;
+ break;
+ case XSCDV_FLOW_ACTION_TAG:
+ if (have_flow_tag) {
+ errno = EINVAL;
+ goto err;
+ }
+ fill_attr_in_uint32(cmd,
+ XSC_IB_ATTR_CREATE_FLOW_TAG,
+ actions_attr[i].tag_value);
+ have_flow_tag = true;
+ break;
+ default:
+ errno = EOPNOTSUPP;
+ goto err;
+ }
+ }
+
+ if (num_flow_actions)
+ fill_attr_in_objs_arr(cmd,
+ XSC_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ flow_actions,
+ num_flow_actions);
+ ret = execute_ioctl(flow_matcher->context, cmd);
+ if (ret)
+ goto err;
+
+ mflow->flow_id.handle = read_attr_obj(XSC_IB_ATTR_CREATE_FLOW_HANDLE, handle);
+ mflow->flow_id.context = flow_matcher->context;
+ return &mflow->flow_id;
+err:
+ free(mflow);
+ return NULL;
+}
+
+struct xscdv_devx_umem *
+xscdv_devx_umem_reg(struct ibv_context *context, void *addr, size_t size, uint32_t access)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_UMEM,
+ XSC_IB_METHOD_DEVX_UMEM_REG,
+ 5);
+ struct ib_uverbs_attr *handle;
+ struct xsc_devx_umem *umem;
+ int ret;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ if (ibv_dontfork_range(addr, size))
+ goto err;
+
+ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ADDR, (intptr_t)addr);
+ fill_attr_in_uint64(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_LEN, size);
+ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_ACCESS, access);
+ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+ &umem->dv_devx_umem.umem_id,
+ sizeof(umem->dv_devx_umem.umem_id));
+ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+
+ ret = execute_ioctl(context, cmd);
+ if (ret)
+ goto err_umem_reg_cmd;
+
+ umem->handle = read_attr_obj(XSC_IB_ATTR_DEVX_UMEM_REG_HANDLE, handle);
+ umem->context = context;
+ umem->addr = addr;
+ umem->size = size;
+
+ return &umem->dv_devx_umem;
+
+err_umem_reg_cmd:
+ ibv_dofork_range(addr, size);
+err:
+ free(umem);
+ return NULL;
+}
+
+int xscdv_devx_umem_dereg(struct xscdv_devx_umem *dv_devx_umem)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_UMEM,
+ XSC_IB_METHOD_DEVX_UMEM_DEREG,
+ 1);
+ int ret;
+ struct xsc_devx_umem *umem = container_of(dv_devx_umem, struct xsc_devx_umem,
+ dv_devx_umem);
+
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, umem->handle);
+ ret = execute_ioctl(umem->context, cmd);
+ if (ret)
+ return ret;
+
+ ibv_dofork_range(umem->addr, umem->size);
+ free(umem);
+ return 0;
+}
+
+struct xscdv_devx_obj *
+xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen,
+ void *out, size_t outlen)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_OBJ,
+ XSC_IB_METHOD_DEVX_OBJ_CREATE,
+ 3);
+ struct ib_uverbs_attr *handle;
+ struct xscdv_devx_obj *obj;
+ int ret;
+
+ obj = calloc(1, sizeof(*obj));
+ if (!obj) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ handle = fill_attr_out_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, in, inlen);
+ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, out, outlen);
+
+ ret = execute_ioctl(context, cmd);
+ if (ret)
+ goto err;
+
+ obj->handle = read_attr_obj(XSC_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, handle);
+ obj->context = context;
+ return obj;
+err:
+ free(obj);
+ return NULL;
+}
+
+int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen,
+ void *out, size_t outlen)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_OBJ,
+ XSC_IB_METHOD_DEVX_OBJ_QUERY,
+ 3);
+
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, obj->handle);
+ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, in, inlen);
+ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, out, outlen);
+
+ return execute_ioctl(obj->context, cmd);
+}
+
+int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen,
+ void *out, size_t outlen)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_OBJ,
+ XSC_IB_METHOD_DEVX_OBJ_MODIFY,
+ 3);
+
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, obj->handle);
+ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, in, inlen);
+ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, out, outlen);
+
+ return execute_ioctl(obj->context, cmd);
+}
+
+int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX_OBJ,
+ XSC_IB_METHOD_DEVX_OBJ_DESTROY,
+ 1);
+ int ret;
+
+ fill_attr_in_obj(cmd, XSC_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, obj->handle);
+ ret = execute_ioctl(obj->context, cmd);
+
+ if (ret)
+ return ret;
+ free(obj);
+ return 0;
+}
+
+int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen,
+ void *out, size_t outlen)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX,
+ XSC_IB_METHOD_DEVX_OTHER,
+ 2);
+
+ fill_attr_in(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_IN, in, inlen);
+ fill_attr_out(cmd, XSC_IB_ATTR_DEVX_OTHER_CMD_OUT, out, outlen);
+
+ return execute_ioctl(context, cmd);
+}
+
+int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector,
+ uint32_t *eqn)
+{
+ DECLARE_COMMAND_BUFFER(cmd,
+ XSC_IB_OBJECT_DEVX,
+ XSC_IB_METHOD_DEVX_QUERY_EQN,
+ 2);
+
+ fill_attr_in_uint32(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, vector);
+ fill_attr_out_ptr(cmd, XSC_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, eqn);
+
+ return execute_ioctl(context, cmd);
+}
diff --git a/providers/xscale/wqe.h b/providers/xscale/wqe.h
new file mode 100644
index 0000000..4b7f327
--- /dev/null
+++ b/providers/xscale/wqe.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef WQE_H
+#define WQE_H
+
+#include <stdint.h>
+
+struct xsc_wqe_eth_pad {
+ uint8_t rsvd0[16];
+};
+
+struct xsc_wqe_xrc_seg {
+ __be32 xrc_srqn;
+ uint8_t rsvd[12];
+};
+
+enum {
+ XSC_IPOIB_INLINE_MIN_HEADER_SIZE = 4,
+ XSC_SOURCE_QPN_INLINE_MAX_HEADER_SIZE = 18,
+ XSC_ETH_L2_INLINE_HEADER_SIZE = 18,
+ XSC_ETH_L2_MIN_HEADER_SIZE = 14,
+};
+
+struct xsc_wqe_umr_ctrl_seg {
+ uint8_t flags;
+ uint8_t rsvd0[3];
+ __be16 klm_octowords;
+ __be16 translation_offset;
+ __be64 mkey_mask;
+ uint8_t rsvd1[32];
+};
+
+struct xsc_wqe_umr_klm_seg {
+ /* up to 2GB */
+ __be32 byte_count;
+ __be32 mkey;
+ __be64 address;
+};
+
+union xsc_wqe_umr_inline_seg {
+ struct xsc_wqe_umr_klm_seg klm;
+};
+
+struct xsc_wqe_mkey_context_seg {
+ uint8_t free;
+ uint8_t reserved1;
+ uint8_t access_flags;
+ uint8_t sf;
+ __be32 qpn_mkey;
+ __be32 reserved2;
+ __be32 flags_pd;
+ __be64 start_addr;
+ __be64 len;
+ __be32 bsf_octword_size;
+ __be32 reserved3[4];
+ __be32 translations_octword_size;
+ uint8_t reserved4[3];
+ uint8_t log_page_size;
+ __be32 reserved;
+ union xsc_wqe_umr_inline_seg inseg[0];
+};
+
+struct xsc_rwqe_sig {
+ uint8_t rsvd0[4];
+ uint8_t signature;
+ uint8_t rsvd1[11];
+};
+
+#endif /* WQE_H */
diff --git a/providers/xscale/xsc-abi.h b/providers/xscale/xsc-abi.h
new file mode 100644
index 0000000..7eab95c
--- /dev/null
+++ b/providers/xscale/xsc-abi.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_ABI_H
+#define XSC_ABI_H
+
+#include <infiniband/kern-abi.h>
+#include <infiniband/verbs.h>
+#include <rdma/xsc-abi.h>
+#include <kernel-abi/xsc-abi.h>
+#include "xscdv.h"
+
+#define XSC_UVERBS_MIN_ABI_VERSION 1
+#define XSC_UVERBS_MAX_ABI_VERSION 1
+
+DECLARE_DRV_CMD(xsc_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
+ xsc_ib_alloc_ucontext_req, xsc_ib_alloc_ucontext_resp);
+DECLARE_DRV_CMD(xsc_create_ah, IB_USER_VERBS_CMD_CREATE_AH,
+ empty, xsc_ib_create_ah_resp);
+DECLARE_DRV_CMD(xsc_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
+ empty, xsc_ib_alloc_pd_resp);
+DECLARE_DRV_CMD(xsc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
+ xsc_ib_create_cq, xsc_ib_create_cq_resp);
+DECLARE_DRV_CMD(xsc_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ xsc_ib_create_cq, xsc_ib_create_cq_resp);
+DECLARE_DRV_CMD(xsc_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP,
+ xsc_ib_create_qp, xsc_ib_create_qp_resp);
+DECLARE_DRV_CMD(xsc_create_qp_ex_rss, IB_USER_VERBS_EX_CMD_CREATE_QP,
+ xsc_ib_create_qp_rss, xsc_ib_create_qp_resp);
+DECLARE_DRV_CMD(xsc_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
+ xsc_ib_create_qp, xsc_ib_create_qp_resp);
+DECLARE_DRV_CMD(xsc_create_wq, IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ xsc_ib_create_wq, xsc_ib_create_wq_resp);
+DECLARE_DRV_CMD(xsc_modify_wq, IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ xsc_ib_modify_wq, empty);
+DECLARE_DRV_CMD(xsc_create_rwq_ind_table, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ empty, empty);
+DECLARE_DRV_CMD(xsc_destroy_rwq_ind_table, IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ empty, empty);
+DECLARE_DRV_CMD(xsc_resize_cq, IB_USER_VERBS_CMD_RESIZE_CQ,
+ xsc_ib_resize_cq, empty);
+DECLARE_DRV_CMD(xsc_query_device_ex, IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ empty, xsc_ib_query_device_resp);
+DECLARE_DRV_CMD(xsc_modify_qp_ex, IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ empty, xsc_ib_modify_qp_resp);
+
+struct xsc_modify_qp {
+ struct ibv_modify_qp_ex ibv_cmd;
+ __u32 comp_mask;
+ struct xsc_ib_burst_info burst_info;
+ __u32 reserved;
+};
+
+#endif /* XSC_ABI_H */
diff --git a/providers/xscale/xsc_api.h b/providers/xscale/xsc_api.h
new file mode 100644
index 0000000..c533019
--- /dev/null
+++ b/providers/xscale/xsc_api.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_API_H
+#define XSC_API_H
+
+#include <infiniband/xsc_user_ioctl_verbs.h>
+
+#define xscdv_flow_action_flags xsc_ib_uapi_flow_action_flags
+#define XSCDV_FLOW_ACTION_FLAGS_REQUIRE_METADATA XSC_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+#define xscdv_flow_table_type xsc_ib_uapi_flow_table_type
+#define XSCDV_FLOW_TABLE_TYPE_NIC_RX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX
+#define XSCDV_FLOW_TABLE_TYPE_NIC_TX XSC_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX
+#define xscdv_flow_action_packet_reformat_type xsc_ib_uapi_flow_action_packet_reformat_type
+#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2
+#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL
+#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2
+#define XSCDV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL XSC_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL
+
+enum xsc_qp_create_flags {
+ XSC_QP_CREATE_RAWPACKE_TSO = 1 << 0,
+ XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0,
+ XSC_QP_CREATE_RAWPACKET_TX = 1 << 1,
+};
+
+
+#endif
diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h
new file mode 100644
index 0000000..53fe552
--- /dev/null
+++ b/providers/xscale/xsc_hsi.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef __XSC_HSI_H__
+#define __XSC_HSI_H__
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "sqm_csr_defines.h"
+#include "rqm_csr_defines.h"
+#include "cqm_csr_defines.h"
+
+#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
+#define lower_32_bits(n) ((uint32_t)(n))
+
+#define DMA_LO_LE(x) __cpu_to_le32(lower_32_bits(x))
+#define DMA_HI_LE(x) __cpu_to_le32(upper_32_bits(x))
+#define DMA_REGPAIR_LE(x, val) do { \
+ (x).hi = DMA_HI_LE((val)); \
+ (x).lo = DMA_LO_LE((val)); \
+ } while (0)
+
+#define WR_LE_16(x, val) x = __cpu_to_le16(val)
+#define WR_LE_32(x, val) x = __cpu_to_le32(val)
+#define WR_LE_64(x, val) x = __cpu_to_le64(val)
+#define WR_LE_R64(x, val) DMA_REGPAIR_LE(x, val)
+#define WR_BE_32(x, val) x = __cpu_to_be32(val)
+
+#define RD_LE_16(x) __le16_to_cpu(x)
+#define RD_LE_32(x) __le32_to_cpu(x)
+#define RD_BE_32(x) __be32_to_cpu(x)
+
+#define WR_REG(addr, val) mmio_write32_le(addr, val)
+#define RD_REG(addr) mmio_read32_le(addr)
+
+/* message opcode */
+enum {
+ XSC_MSG_OPCODE_SEND = 0,
+ XSC_MSG_OPCODE_RDMA_WRITE = 1,
+ XSC_MSG_OPCODE_RDMA_READ = 2,
+ XSC_MSG_OPCODE_MAD = 3,
+ XSC_MSG_OPCODE_RDMA_ACK = 4,
+ XSC_MSG_OPCODE_RDMA_ACK_READ = 5,
+ XSC_MSG_OPCODE_RDMA_CNP = 6,
+ XSC_MSG_OPCODE_RAW = 7,
+ XSC_MSG_OPCODE_VIRTIO_NET = 8,
+ XSC_MSG_OPCODE_VIRTIO_BLK = 9,
+ XSC_MSG_OPCODE_RAW_TPE = 10,
+ XSC_MSG_OPCODE_INIT_QP_REQ = 11,
+ XSC_MSG_OPCODE_INIT_QP_RSP = 12,
+ XSC_MSG_OPCODE_INIT_PATH_REQ = 13,
+ XSC_MSG_OPCODE_INIT_PATH_RSP = 14,
+};
+
+enum {
+ XSC_REQ = 0,
+ XSC_RSP = 1,
+};
+
+enum {
+ XSC_WITHOUT_IMMDT = 0,
+ XSC_WITH_IMMDT = 1,
+};
+
+enum {
+ XSC_ERR_CODE_NAK_RETRY = 0x40,
+ XSC_ERR_CODE_NAK_OPCODE = 0x41,
+ XSC_ERR_CODE_NAK_MR = 0x42,
+ XSC_ERR_CODE_NAK_OPERATION = 0x43,
+ XSC_ERR_CODE_NAK_RNR = 0x44,
+ XSC_ERR_CODE_LOCAL_MR = 0x45,
+ XSC_ERR_CODE_LOCAL_LEN = 0x46,
+ XSC_ERR_CODE_LOCAL_OPCODE = 0x47,
+ XSC_ERR_CODE_CQ_OVER_FLOW = 0x48,
+ XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4c,
+ XSC_ERR_CODE_CQE_ACC = 0x4d,
+ XSC_ERR_CODE_FLUSH = 0x4e,
+ XSC_ERR_CODE_MALF_WQE_HOST = 0x50,
+ XSC_ERR_CODE_MALF_WQE_INFO = 0x51,
+ XSC_ERR_CODE_MR_NON_NAK = 0x52,
+ XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61,
+ XSC_ERR_CODE_MANY_READ = 0x62,
+ XSC_ERR_CODE_LEN_GEN_CQE = 0x63,
+ XSC_ERR_CODE_MR = 0x65,
+ XSC_ERR_CODE_MR_GEN_CQE = 0x66,
+ XSC_ERR_CODE_OPERATION = 0x67,
+ XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68,
+};
+
+/* TODO: sw cqe opcode*/
+enum {
+ XSC_OPCODE_RDMA_REQ_SEND = 0,
+ XSC_OPCODE_RDMA_REQ_SEND_IMMDT = 1,
+ XSC_OPCODE_RDMA_RSP_RECV = 2,
+ XSC_OPCODE_RDMA_RSP_RECV_IMMDT = 3,
+ XSC_OPCODE_RDMA_REQ_WRITE = 4,
+ XSC_OPCODE_RDMA_REQ_WRITE_IMMDT = 5,
+ XSC_OPCODE_RDMA_RSP_WRITE_IMMDT = 6,
+ XSC_OPCODE_RDMA_REQ_READ = 7,
+ XSC_OPCODE_RDMA_REQ_ERROR = 8,
+ XSC_OPCODE_RDMA_RSP_ERROR = 9,
+ XSC_OPCODE_RDMA_CQE_ERROR = 10,
+};
+
+enum {
+ XSC_BASE_WQE_SHIFT = 4,
+};
+
+/*
+ * Descriptors that are allocated by SW and accessed by HW, 32-byte aligned
+ */
+/* this is to keep descriptor structures packed */
+struct regpair {
+ __le32 lo;
+ __le32 hi;
+};
+
+struct xsc_send_wqe_ctrl_seg {
+ uint8_t msg_opcode;
+ uint8_t with_immdt:1;
+ uint8_t :2;
+ uint8_t ds_data_num:5;
+ __le16 wqe_id;
+ __le32 msg_len;
+ __le32 opcode_data;
+ uint8_t se:1;
+ uint8_t ce:1;
+ uint8_t in_line:1;
+ __le32 :29;
+};
+
+
+struct xsc_wqe_data_seg {
+ union {
+ struct {
+ uint8_t :1;
+ __le32 seg_len:31;
+ __le32 mkey;
+ __le64 va;
+ };
+ struct {
+ uint8_t in_line_data[16];
+ };
+ };
+};
+
+struct xsc_cqe {
+ union {
+ uint8_t msg_opcode;
+ struct {
+ uint8_t error_code:7;
+ uint8_t is_error:1;
+ };
+ };
+ __le32 qp_id:15;
+ uint8_t :1;
+ uint8_t se:1;
+ uint8_t has_pph:1;
+ uint8_t type:1;
+ uint8_t with_immdt:1;
+ uint8_t csum_err:4;
+ __le32 imm_data;
+ __le32 msg_len;
+ __le32 vni;
+ __le64 ts:48;
+ __le16 wqe_id;
+ __le16 rsv[3];
+ __le16 rsv1:15;
+ uint8_t owner:1;
+};
+
+/* Size of CQE */
+#define XSC_CQE_SIZE sizeof(struct xsc_cqe)
+
+union xsc_db_data {
+ struct {
+ __le32 sq_next_pid:16;
+ __le32 sqn:15;
+ __le32 :1;
+ };
+ struct {
+ __le32 rq_next_pid:13;
+ __le32 rqn:15;
+ __le32 :4;
+ };
+ struct {
+ __le32 cq_next_cid:16;
+ __le32 cqn:15;
+ __le32 solicited:1;
+ };
+ __le32 raw_data;
+};
+
+#define CQM_DB_NEXT_CID_OFFSET(n) (4 * (n))
+
+#define XSC_SEND_WQE_RING_DEPTH_MIN 16
+#define XSC_CQE_RING_DEPTH_MIN 2
+#define XSC_SEND_WQE_RING_DEPTH_MAX 1024
+#define XSC_RECV_WQE_RING_DEPTH_MAX 1024
+#define XSC_CQE_RING_DEPTH_MAX (1024 * 32)
+
+/*
+ * Registers that are allocated by HW and accessed by SW in 4-byte granularity
+ */
+/* MMT table (32 bytes) */
+struct xsc_mmt_tbl {
+ struct regpair pa;
+ struct regpair va;
+ __le32 size;
+#define XSC_MMT_TBL_PD_MASK 0x00FFFFFF
+#define XSC_MMT_TBL_KEY_MASK 0xFF000000
+ __le32 key_pd;
+#define XSC_MMT_TBL_ACC_MASK 0x0000000F
+ __le32 acc;
+ uint8_t padding[4];
+};
+
+/* QP Context (16 bytes) */
+struct xsc_qp_context {
+#define XSC_QP_CONTEXT_STATE_MASK 0x00000007
+#define XSC_QP_CONTEXT_FUNC_MASK 0x00000018
+#define XSC_QP_CONTEXT_DSTID_MASK 0x000000E0
+#define XSC_QP_CONTEXT_PD_MASK 0xFFFFFF00
+ __le32 pd_dstid_func_state;
+#define XSC_QP_CONTEXT_DSTQP_MASK 0x00FFFFFF
+#define XSC_QP_CONTEXT_RCQIDL_MASK 0xFF000000
+ __le32 rcqidl_dstqp;
+#define XSC_QP_CONTEXT_RCQIDH_MASK 0x0000FFFF
+#define XSC_QP_CONTEXT_SCQIDL_MASK 0xFFFF0000
+ __le32 scqidl_rcqidh;
+#define XSC_QP_CONTEXT_SCQIDH_MASK 0x000000FF
+ __le32 scqidh;
+};
+
+/* TODO: EPP Table and others */
+
+static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz) ALWAYS_INLINE;
+
+static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz) ALWAYS_INLINE;
+
+static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz)
+{
+ return cqe->owner == ((cid >> ring_sz) & 1);
+}
+
+static inline void xsc_set_cqe_sw_own(struct xsc_cqe *cqe, int pid, int ring_sz)
+{
+ cqe->owner = ((pid >> ring_sz) & 1);
+}
+#endif /* __XSC_HSI_H__ */
diff --git a/providers/xscale/xsc_hw.h b/providers/xscale/xsc_hw.h
new file mode 100755
index 0000000..f2b0ce3
--- /dev/null
+++ b/providers/xscale/xsc_hw.h
@@ -0,0 +1,584 @@
+#ifndef _XSC_HW_H_
+#define _XSC_HW_H_
+
+#include <util/mmio.h>
+
+#include "xscale.h"
+
+struct xsc_andes_cqe {
+ union {
+ uint8_t msg_opcode;
+ struct {
+ uint8_t error_code:7;
+ uint8_t is_error:1;
+ };
+ };
+ __le32 qp_id:15;
+ uint8_t :1;
+ uint8_t se:1;
+ uint8_t has_pph:1;
+ uint8_t type:1;
+ uint8_t with_immdt:1;
+ uint8_t csum_err:4;
+ __le32 imm_data;
+ __le32 msg_len;
+ __le32 vni;
+ __le64 ts:48;
+ __le16 wqe_id;
+ __le16 rsv[3];
+ __le16 rsv1:15;
+ uint8_t owner:1;
+};
+
+union xsc_andes_cq_doorbell {
+ struct {
+ uint32_t cq_next_cid:16;
+ uint32_t cq_id:15;
+ uint32_t arm:1;
+ };
+ uint32_t val;
+};
+
+union xsc_andes_send_doorbell {
+ struct {
+ uint32_t next_pid:16;
+ uint32_t qp_id:15;
+ };
+ uint32_t raw;
+};
+
+union xsc_andes_recv_doorbell {
+ struct {
+ uint32_t next_pid:13;
+ uint32_t qp_id:15;
+ };
+ uint32_t raw;
+};
+
+struct xsc_andes_data_seg {
+ uint32_t :1;
+ uint32_t length:31;
+ uint32_t key;
+ uint64_t addr;
+};
+
+struct xsc_diamond_cqe {
+ uint8_t error_code;
+ __le32 qp_id:15;
+ uint8_t :1;
+ uint8_t se:1;
+ uint8_t has_pph:1;
+ uint8_t type:1;
+ uint8_t with_immdt:1;
+ uint8_t csum_err:4;
+ __le32 imm_data;
+ __le32 msg_len;
+ __le32 vni;
+ __le64 ts:48;
+ __le16 wqe_id;
+ uint8_t msg_opcode;
+ uint8_t rsv;
+ __le16 rsv1[2];
+ __le16 rsv2:15;
+ uint8_t owner:1;
+};
+
+union xsc_diamond_cq_doorbell {
+ struct {
+ uint64_t cq_next_cid:23;
+ uint64_t cq_id:14;
+ uint64_t cq_sta:2;
+ };
+ uint64_t raw;
+};
+
+union xsc_diamond_recv_doorbell {
+ struct {
+ uint64_t next_pid:14;
+ uint64_t qp_id:14;
+ };
+ uint64_t raw;
+};
+
+union xsc_diamond_send_doorbell {
+ struct {
+ uint64_t next_pid:17;
+ uint64_t qp_id:14;
+ };
+ uint64_t raw;
+};
+
+struct xsc_diamond_data_seg {
+ uint32_t length;
+ uint32_t key;
+ uint64_t addr;
+};
+
+union xsc_diamond_next_cq_doorbell {
+ struct {
+ uint64_t cq_next_cid:23;
+ uint64_t cq_id:10;
+ uint64_t cq_sta:2;
+ };
+ uint64_t raw;
+};
+
+union xsc_diamond_next_send_doorbell {
+ struct {
+ uint64_t next_pid:17;
+ uint64_t qp_id:10;
+ };
+ uint64_t raw;
+};
+
+union xsc_diamond_next_recv_doorbell {
+ struct {
+ uint64_t next_pid:14;
+ uint64_t qp_id:10;
+ };
+ uint64_t raw;
+};
+
+enum {
+ XSC_CQ_STAT_FIRED,
+ XSC_CQ_STAT_KEEP,
+ XSC_CQ_STAT_ARM_NEXT,
+ XSC_CQ_STAT_ARM_SOLICITED,
+};
+
+#define XSC_HW_ALWAYS_INLINE inline __attribute__((always_inline))
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_msg_opcode(void *cqe)
+{
+ return ((struct xsc_diamond_cqe *)cqe)->msg_opcode;
+}
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_msg_opcode(void *cqe)
+{
+ return ((struct xsc_andes_cqe *)cqe)->msg_opcode;
+}
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_msg_opcode(uint16_t device_id, void *cqe)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ return xsc_andes_get_cqe_msg_opcode(cqe);
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ return xsc_diamond_get_cqe_msg_opcode(cqe);
+ default:
+ return xsc_andes_get_cqe_msg_opcode(cqe);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE bool xsc_diamond_is_err_cqe(void *cqe)
+{
+ return !!((struct xsc_diamond_cqe *)cqe)->error_code;
+}
+
+static XSC_HW_ALWAYS_INLINE bool xsc_andes_is_err_cqe(void *cqe)
+{
+ return ((struct xsc_andes_cqe *)cqe)->is_error;
+}
+
+static XSC_HW_ALWAYS_INLINE bool xsc_hw_is_err_cqe(uint16_t device_id, void *cqe)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ return xsc_andes_is_err_cqe(cqe);
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ return xsc_diamond_is_err_cqe(cqe);
+ default:
+ return xsc_andes_is_err_cqe(cqe);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_diamond_get_cqe_err_code(void *cqe)
+{
+ return ((struct xsc_diamond_cqe *)cqe)->error_code;
+}
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_andes_get_cqe_err_code(void *cqe)
+{
+ return ((struct xsc_andes_cqe *)cqe)->error_code;
+}
+
+static XSC_HW_ALWAYS_INLINE uint8_t xsc_hw_get_cqe_err_code(uint16_t device_id, void *cqe)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ return xsc_andes_get_cqe_err_code(cqe);
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ return xsc_diamond_get_cqe_err_code(cqe);
+ default:
+ return xsc_andes_get_cqe_err_code(cqe);
+ }
+}
+
+static inline enum ibv_wc_status xsc_andes_cqe_err_code(uint8_t error_code)
+{
+ switch (error_code) {
+ case XSC_ANDES_ERR_CODE_NAK_RETRY:
+ return IBV_WC_RETRY_EXC_ERR;
+ case XSC_ANDES_ERR_CODE_NAK_OPCODE:
+ return IBV_WC_REM_INV_REQ_ERR;
+ case XSC_ANDES_ERR_CODE_NAK_MR:
+ return IBV_WC_REM_ACCESS_ERR;
+ case XSC_ANDES_ERR_CODE_NAK_OPERATION:
+ return IBV_WC_REM_OP_ERR;
+ case XSC_ANDES_ERR_CODE_NAK_RNR:
+ return IBV_WC_RNR_RETRY_EXC_ERR;
+ case XSC_ANDES_ERR_CODE_LOCAL_MR:
+ return IBV_WC_LOC_PROT_ERR;
+ case XSC_ANDES_ERR_CODE_LOCAL_LEN:
+ return IBV_WC_LOC_LEN_ERR;
+ case XSC_ANDES_ERR_CODE_LEN_GEN_CQE:
+ return IBV_WC_LOC_LEN_ERR;
+ case XSC_ANDES_ERR_CODE_OPERATION:
+ return IBV_WC_LOC_ACCESS_ERR;
+ case XSC_ANDES_ERR_CODE_FLUSH:
+ return IBV_WC_WR_FLUSH_ERR;
+ case XSC_ANDES_ERR_CODE_MALF_WQE_HOST:
+ case XSC_ANDES_ERR_CODE_STRG_ACC_GEN_CQE:
+ case XSC_ANDES_ERR_CODE_STRG_ACC:
+ return IBV_WC_FATAL_ERR;
+ case XSC_ANDES_ERR_CODE_MR_GEN_CQE:
+ return IBV_WC_LOC_PROT_ERR;
+ case XSC_ANDES_ERR_CODE_LOCAL_OPERATION_WQE:
+ return IBV_WC_LOC_QP_OP_ERR;
+ case XSC_ANDES_ERR_CODE_OPCODE_GEN_CQE:
+ case XSC_ANDES_ERR_CODE_LOCAL_OPCODE:
+ default:
+ return IBV_WC_GENERAL_ERR;
+ }
+}
+
+static inline enum ibv_wc_status xsc_diamond_cqe_err_code(uint8_t error_code)
+{
+ switch (error_code) {
+ case XSC_DIAMOND_ERR_CODE_NAK_SEQ_ERR:
+ case XSC_DIAMOND_ERR_CODE_RTO_REQ:
+ return IBV_WC_RETRY_EXC_ERR;
+ case XSC_DIAMOND_ERR_CODE_NAK_INV_REQ:
+ return IBV_WC_REM_INV_REQ_ERR;
+ case XSC_DIAMOND_ERR_CODE_NAK_MR:
+ return IBV_WC_REM_ACCESS_ERR;
+ case XSC_DIAMOND_ERR_CODE_NAK_REMOTE_OPER_ERR:
+ return IBV_WC_REM_OP_ERR;
+ case XSC_DIAMOND_ERR_CODE_LOCAL_MR_REQ:
+ case XSC_DIAMOND_ERR_CODE_REMOTE_MR:
+ case XSC_DIAMOND_ERR_CODE_REMOTE_MR_GEN_CQE:
+ case XSC_DIAMOND_ERR_CODE_LOCAL_MR_RSP:
+ return IBV_WC_LOC_PROT_ERR;
+ case XSC_DIAMOND_ERR_CODE_LEN:
+ case XSC_DIAMOND_ERR_CODE_LEN_GEN_CQE:
+ return IBV_WC_LOC_LEN_ERR;
+ case XSC_DIAMOND_ERR_CODE_FLUSH:
+ return IBV_WC_WR_FLUSH_ERR;
+ case XSC_DIAMOND_ERR_CODE_RCV_WQE_DMA:
+ case XSC_DIAMOND_ERR_CODE_DATA_DMA_RD_REQ:
+ case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP_GEN_CQE:
+ case XSC_DIAMOND_ERR_CODE_DATA_DMA_WR_RSP:
+ return IBV_WC_FATAL_ERR;
+ case XSC_DIAMOND_ERR_CODE_SND_WQE_FORMAT:
+ return IBV_WC_LOC_QP_OP_ERR;
+ default:
+ return IBV_WC_GENERAL_ERR;
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE enum ibv_wc_status xsc_hw_cqe_err_status(uint16_t device_id,
+ void *cqe)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe));
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ return xsc_diamond_cqe_err_code(xsc_diamond_get_cqe_err_code(cqe));
+ default:
+ return xsc_andes_cqe_err_code(xsc_andes_get_cqe_err_code(cqe));
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_data_seg(void *data_seg,
+ uint64_t addr, uint32_t key,
+ uint32_t length)
+{
+ struct xsc_diamond_data_seg *seg = data_seg;
+
+ seg->length = length;
+ seg->key = key;
+ seg->addr = addr;
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_andes_set_data_seg(void *data_seg,
+ uint64_t addr, uint32_t key,
+ uint32_t length)
+{
+ struct xsc_andes_data_seg *seg = data_seg;
+
+ seg->length = length;
+ seg->key = key;
+ seg->addr = addr;
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_hw_set_data_seg(uint16_t device_id, void *data_seg,
+ uint64_t addr, uint32_t key, uint32_t length)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ xsc_andes_set_data_seg(data_seg, addr, key, length);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ xsc_diamond_set_data_seg(data_seg, addr, key, length);
+ break;
+ default:
+ xsc_andes_set_data_seg(data_seg, addr, key, length);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_set_cq_ci(void *db_addr,
+ uint32_t cqn, uint32_t next_cid)
+{
+ union xsc_diamond_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.cq_sta = XSC_CQ_STAT_FIRED;
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_set_cq_ci(void *db_addr,
+ uint32_t cqn, uint32_t next_cid)
+{
+ union xsc_diamond_next_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.cq_sta = XSC_CQ_STAT_FIRED;
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_andes_set_cq_ci(void *db_addr,
+ uint32_t cqn, uint32_t next_cid)
+{
+ union xsc_andes_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.arm = XSC_CQ_STAT_FIRED;
+ udma_to_device_barrier();
+ mmio_write32_le(db_addr, db.val);
+}
+
+
+static XSC_HW_ALWAYS_INLINE void xsc_hw_set_cq_ci(uint16_t device_id, void *db_addr,
+ uint32_t cqn, uint32_t next_cid)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ xsc_andes_set_cq_ci(db_addr, cqn, next_cid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ xsc_diamond_set_cq_ci(db_addr, cqn, next_cid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ xsc_diamond_next_set_cq_ci(db_addr, cqn, next_cid);
+ break;
+ default:
+ xsc_andes_set_cq_ci(db_addr, cqn, next_cid);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_update_cq_db(void *db_addr,
+ uint32_t cqn, uint32_t next_cid,
+ uint8_t solicited)
+{
+ union xsc_diamond_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT;
+ udma_to_device_barrier();
+ mmio_wc_start();
+ mmio_write64_le(db_addr, db.raw);
+ mmio_flush_writes();
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_update_cq_db(void *db_addr,
+ uint32_t cqn, uint32_t next_cid,
+ uint8_t solicited)
+{
+ union xsc_diamond_next_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.cq_sta = solicited ? XSC_CQ_STAT_ARM_SOLICITED : XSC_CQ_STAT_ARM_NEXT;
+ udma_to_device_barrier();
+ mmio_wc_start();
+ mmio_write64_le(db_addr, db.raw);
+ mmio_flush_writes();
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_andes_update_cq_db(void *db_addr,
+ uint32_t cqn, uint32_t next_cid,
+ uint8_t solicited)
+{
+ union xsc_andes_cq_doorbell db;
+
+ db.cq_id = cqn;
+ db.cq_next_cid = next_cid;
+ db.arm = solicited;
+ udma_to_device_barrier();
+ mmio_wc_start();
+ mmio_write32_le(db_addr, db.val);
+ mmio_flush_writes();
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_hw_update_cq_db(uint16_t device_id, void *db_addr,
+ uint32_t cqn, uint32_t next_cid,
+ uint8_t solicited)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ xsc_diamond_update_cq_db(db_addr, cqn, next_cid, solicited);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ xsc_diamond_next_update_cq_db(db_addr, cqn, next_cid, solicited);
+ break;
+ default:
+ xsc_andes_update_cq_db(db_addr, cqn, next_cid, solicited);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_rx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_diamond_recv_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_rx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_diamond_next_recv_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_rx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_andes_recv_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write32_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_rx_doorbell(uint16_t device_id,
+ void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ xsc_diamond_ring_rx_doorbell(db_addr, rqn, next_pid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ xsc_diamond_next_ring_rx_doorbell(db_addr, rqn, next_pid);
+ break;
+ default:
+ xsc_andes_ring_rx_doorbell(db_addr, rqn, next_pid);
+ }
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_ring_tx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_diamond_send_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+
+static XSC_HW_ALWAYS_INLINE void xsc_diamond_next_ring_tx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_diamond_next_send_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write64_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_andes_ring_tx_doorbell(void *db_addr,
+ uint32_t rqn, uint32_t next_pid)
+{
+ union xsc_andes_send_doorbell db;
+
+ db.qp_id = rqn;
+ db.next_pid = next_pid;
+
+ udma_to_device_barrier();
+ mmio_write32_le(db_addr, db.raw);
+}
+
+static XSC_HW_ALWAYS_INLINE void xsc_hw_ring_tx_doorbell(uint16_t device_id,
+ void *db_addr,
+ uint32_t sqn, uint32_t next_pid)
+{
+ switch (device_id) {
+ case XSC_MS_PF_DEV_ID:
+ case XSC_MS_VF_DEV_ID:
+ xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND:
+ xsc_diamond_ring_tx_doorbell(db_addr, sqn, next_pid);
+ break;
+ case XSC_MC_PF_DEV_ID_DIAMOND_NEXT:
+ xsc_diamond_next_ring_tx_doorbell(db_addr, sqn, next_pid);
+ break;
+ default:
+ xsc_andes_ring_tx_doorbell(db_addr, sqn, next_pid);
+ }
+}
+
+#endif /* _XSC_HW_H_ */
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
new file mode 100644
index 0000000..e6792b9
--- /dev/null
+++ b/providers/xscale/xscale.c
@@ -0,0 +1,948 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#define _GNU_SOURCE
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <string.h>
+#include <sched.h>
+#include <sys/param.h>
+
+#include <util/symver.h>
+
+#include "xscale.h"
+#include "xsc-abi.h"
+#include "wqe.h"
+#include "xsc_hsi.h"
+
+#ifndef PCI_VENDOR_ID_MELLANOX
+#define PCI_VENDOR_ID_MELLANOX 0x15b3
+#endif
+
+#ifndef CPU_OR
+#define CPU_OR(x, y, z) do {} while (0)
+#endif
+
+#ifndef CPU_EQUAL
+#define CPU_EQUAL(x, y) 1
+#endif
+
+#define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL)
+static const struct verbs_match_ent hca_table[] = {
+ VERBS_MODALIAS_MATCH("*xscale*", NULL),
+ {}
+};
+
+uint32_t xsc_debug_mask = 0;
+int xsc_freeze_on_error_cqe;
+static void xsc_free_context(struct ibv_context *ibctx);
+
+static const struct verbs_context_ops xsc_ctx_common_ops = {
+ .query_port = xsc_query_port,
+ .alloc_pd = xsc_alloc_pd,
+ .dealloc_pd = xsc_free_pd,
+ .reg_mr = xsc_reg_mr,
+ .rereg_mr = xsc_rereg_mr,
+ .dereg_mr = xsc_dereg_mr,
+ .alloc_mw = NULL,
+ .dealloc_mw = NULL,
+ .bind_mw = NULL,
+ .create_cq = xsc_create_cq,
+ .poll_cq = xsc_poll_cq,
+ .req_notify_cq = xsc_arm_cq,
+ .cq_event = xsc_cq_event,
+ .resize_cq = xsc_resize_cq,
+ .destroy_cq = xsc_destroy_cq,
+ .create_srq = NULL,
+ .modify_srq = NULL,
+ .query_srq = NULL,
+ .destroy_srq = NULL,
+ .post_srq_recv = NULL,
+ .create_qp = xsc_create_qp,
+ .query_qp = xsc_query_qp,
+ .modify_qp = xsc_modify_qp,
+ .destroy_qp = xsc_destroy_qp,
+ .post_send = xsc_post_send,
+ .post_recv = xsc_post_recv,
+ .create_ah = xsc_create_ah,
+ .destroy_ah = xsc_destroy_ah,
+ .attach_mcast = xsc_attach_mcast,
+ .detach_mcast = xsc_detach_mcast,
+
+ .alloc_dm = xsc_alloc_dm,
+ .alloc_parent_domain = xsc_alloc_parent_domain,
+ .alloc_td = NULL,
+ .attach_counters_point_flow = xsc_attach_counters_point_flow,
+ .close_xrcd = xsc_close_xrcd,
+ .create_counters = xsc_create_counters,
+ .create_cq_ex = xsc_create_cq_ex,
+ .create_flow = xsc_create_flow,
+ .create_flow_action_esp = xsc_create_flow_action_esp,
+ .create_qp_ex = xsc_create_qp_ex,
+ .create_rwq_ind_table = xsc_create_rwq_ind_table,
+ .create_srq_ex = NULL,
+ .create_wq = xsc_create_wq,
+ .dealloc_td = NULL,
+ .destroy_counters = xsc_destroy_counters,
+ .destroy_flow = xsc_destroy_flow,
+ .destroy_flow_action = xsc_destroy_flow_action,
+ .destroy_rwq_ind_table = xsc_destroy_rwq_ind_table,
+ .destroy_wq = xsc_destroy_wq,
+ .free_dm = xsc_free_dm,
+ .get_srq_num = NULL,
+ .modify_cq = xsc_modify_cq,
+ .modify_flow_action_esp = xsc_modify_flow_action_esp,
+ .modify_qp_rate_limit = xsc_modify_qp_rate_limit,
+ .modify_wq = xsc_modify_wq,
+ .open_xrcd = xsc_open_xrcd,
+ .post_srq_ops = NULL,
+ .query_device_ex = xsc_query_device_ex,
+ .query_rt_values = xsc_query_rt_values,
+ .read_counters = xsc_read_counters,
+ .reg_dm_mr = xsc_reg_dm_mr,
+ .alloc_null_mr = xsc_alloc_null_mr,
+ .free_context = xsc_free_context,
+};
+
+static int read_number_from_line(const char *line, int *value)
+{
+ const char *ptr;
+
+ ptr = strchr(line, ':');
+ if (!ptr)
+ return 1;
+
+ ++ptr;
+
+ *value = atoi(ptr);
+ return 0;
+}
+/**
+ * The function looks for the first free user-index in all the
+ * user-index tables. If all are used, returns -1, otherwise
+ * a valid user-index.
+ * In case the reference count of the table is zero, it means the
+ * table is not in use and wasn't allocated yet, therefore the
+ * xsc_store_uidx allocates the table, and increment the reference
+ * count on the table.
+ */
+static int32_t get_free_uidx(struct xsc_context *ctx)
+{
+ int32_t tind;
+ int32_t i;
+
+ for (tind = 0; tind < XSC_UIDX_TABLE_SIZE; tind++) {
+ if (ctx->uidx_table[tind].refcnt < XSC_UIDX_TABLE_MASK)
+ break;
+ }
+
+ if (tind == XSC_UIDX_TABLE_SIZE)
+ return -1;
+
+ if (!ctx->uidx_table[tind].refcnt)
+ return tind << XSC_UIDX_TABLE_SHIFT;
+
+ for (i = 0; i < XSC_UIDX_TABLE_MASK + 1; i++) {
+ if (!ctx->uidx_table[tind].table[i])
+ break;
+ }
+
+ return (tind << XSC_UIDX_TABLE_SHIFT) | i;
+}
+
+int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc)
+{
+ int32_t tind;
+ int32_t ret = -1;
+ int32_t uidx;
+
+ pthread_mutex_lock(&ctx->uidx_table_mutex);
+ uidx = get_free_uidx(ctx);
+ if (uidx < 0)
+ goto out;
+
+ tind = uidx >> XSC_UIDX_TABLE_SHIFT;
+
+ if (!ctx->uidx_table[tind].refcnt) {
+ ctx->uidx_table[tind].table = calloc(XSC_UIDX_TABLE_MASK + 1,
+ sizeof(struct xsc_resource *));
+ if (!ctx->uidx_table[tind].table)
+ goto out;
+ }
+
+ ++ctx->uidx_table[tind].refcnt;
+ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = rsc;
+ ret = uidx;
+
+out:
+ pthread_mutex_unlock(&ctx->uidx_table_mutex);
+ return ret;
+}
+
+void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx)
+{
+ int tind = uidx >> XSC_UIDX_TABLE_SHIFT;
+
+ pthread_mutex_lock(&ctx->uidx_table_mutex);
+
+ if (!--ctx->uidx_table[tind].refcnt)
+ free(ctx->uidx_table[tind].table);
+ else
+ ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK] = NULL;
+
+ pthread_mutex_unlock(&ctx->uidx_table_mutex);
+}
+
+static int xsc_is_sandy_bridge(int *num_cores)
+{
+ char line[128];
+ FILE *fd;
+ int rc = 0;
+ int cur_cpu_family = -1;
+ int cur_cpu_model = -1;
+
+ fd = fopen("/proc/cpuinfo", "r");
+ if (!fd)
+ return 0;
+
+ *num_cores = 0;
+
+ while (fgets(line, 128, fd)) {
+ int value;
+
+ /* if this is information on new processor */
+ if (!strncmp(line, "processor", 9)) {
+ ++*num_cores;
+
+ cur_cpu_family = -1;
+ cur_cpu_model = -1;
+ } else if (!strncmp(line, "cpu family", 10)) {
+ if ((cur_cpu_family < 0) && (!read_number_from_line(line, &value)))
+ cur_cpu_family = value;
+ } else if (!strncmp(line, "model", 5)) {
+ if ((cur_cpu_model < 0) && (!read_number_from_line(line, &value)))
+ cur_cpu_model = value;
+ }
+
+ /* if this is a Sandy Bridge CPU */
+ if ((cur_cpu_family == 6) &&
+ (cur_cpu_model == 0x2A || (cur_cpu_model == 0x2D) ))
+ rc = 1;
+ }
+
+ fclose(fd);
+ return rc;
+}
+
+/*
+man cpuset
+
+ This format displays each 32-bit word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f"); words
+ are filled with leading zeros, if required. For masks longer than one word, a comma separator is used between
+ words. Words are displayed in big-endian order, which has the most significant bit first. The hex digits
+ within a word are also in big-endian order.
+
+ The number of 32-bit words displayed is the minimum number needed to display all bits of the bitmask, based on
+ the size of the bitmask.
+
+ Examples of the Mask Format:
+
+ 00000001 # just bit 0 set
+ 40000000,00000000,00000000 # just bit 94 set
+ 000000ff,00000000 # bits 32-39 set
+ 00000000,000E3862 # 1,5,6,11-13,17-19 set
+
+ A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as:
+
+ 00000001,00000001,00010117
+
+ The first "1" is for bit 64, the second for bit 32, the third for bit 16, the fourth for bit 8, the fifth for
+ bit 4, and the "7" is for bits 2, 1, and 0.
+*/
+static void xsc_local_cpu_set(struct ibv_device *ibdev, cpu_set_t *cpu_set)
+{
+ char *p, buf[1024] = {};
+ char *env_value;
+ uint32_t word;
+ int i, k;
+
+ env_value = getenv("XSC_LOCAL_CPUS");
+ if (env_value)
+ strncpy(buf, env_value, sizeof(buf) - 1);
+ else {
+ char fname[MAXPATHLEN];
+ FILE *fp;
+
+ snprintf(fname, MAXPATHLEN, "/sys/class/infiniband/%s/device/local_cpus",
+ ibv_get_device_name(ibdev));
+
+ fp = fopen(fname, "r");
+ if (!fp) {
+ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to open %s\n", fname);
+ return;
+ }
+ if (!fgets(buf, sizeof(buf), fp)) {
+ fprintf(stderr, PFX "Warning: can not get local cpu set: failed to read cpu mask\n");
+ fclose(fp);
+ return;
+ }
+ fclose(fp);
+ }
+
+ p = strrchr(buf, ',');
+ if (!p)
+ p = buf;
+
+ i = 0;
+ do {
+ if (*p == ',') {
+ *p = 0;
+ p ++;
+ }
+
+ word = strtoul(p, NULL, 16);
+
+ for (k = 0; word; ++k, word >>= 1)
+ if (word & 1)
+ CPU_SET(k+i, cpu_set);
+
+ if (p == buf)
+ break;
+
+ p = strrchr(buf, ',');
+ if (!p)
+ p = buf;
+
+ i += 32;
+ } while (i < CPU_SETSIZE);
+}
+
+static int xsc_enable_sandy_bridge_fix(struct ibv_device *ibdev)
+{
+ cpu_set_t my_cpus, dev_local_cpus, result_set;
+ int stall_enable;
+ int ret;
+ int num_cores;
+
+ if (!xsc_is_sandy_bridge(&num_cores))
+ return 0;
+
+ /* by default enable stall on sandy bridge arch */
+ stall_enable = 1;
+
+ /*
+ * check if app is bound to cpu set that is inside
+ * of device local cpu set. Disable stalling if true
+ */
+
+ /* use static cpu set - up to CPU_SETSIZE (1024) cpus/node */
+ CPU_ZERO(&my_cpus);
+ CPU_ZERO(&dev_local_cpus);
+ CPU_ZERO(&result_set);
+ ret = sched_getaffinity(0, sizeof(my_cpus), &my_cpus);
+ if (ret == -1) {
+ if (errno == EINVAL)
+ fprintf(stderr, PFX "Warning: my cpu set is too small\n");
+ else
+ fprintf(stderr, PFX "Warning: failed to get my cpu set\n");
+ goto out;
+ }
+
+ /* get device local cpu set */
+ xsc_local_cpu_set(ibdev, &dev_local_cpus);
+
+ /* check if my cpu set is in dev cpu */
+ CPU_OR(&result_set, &my_cpus, &dev_local_cpus);
+ stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1;
+
+out:
+ return stall_enable;
+}
+
+static void xsc_read_env(struct ibv_device *ibdev, struct xsc_context *ctx)
+{
+ char *env_value;
+
+ env_value = getenv("XSC_STALL_CQ_POLL");
+ if (env_value)
+ /* check if cq stall is enforced by user */
+ ctx->stall_enable = (strcmp(env_value, "0")) ? 1 : 0;
+ else
+ /* autodetect if we need to do cq polling */
+ ctx->stall_enable = xsc_enable_sandy_bridge_fix(ibdev);
+
+ env_value = getenv("XSC_STALL_NUM_LOOP");
+ if (env_value)
+ xsc_stall_num_loop = atoi(env_value);
+
+ env_value = getenv("XSC_STALL_CQ_POLL_MIN");
+ if (env_value)
+ xsc_stall_cq_poll_min = atoi(env_value);
+
+ env_value = getenv("XSC_STALL_CQ_POLL_MAX");
+ if (env_value)
+ xsc_stall_cq_poll_max = atoi(env_value);
+
+ env_value = getenv("XSC_STALL_CQ_INC_STEP");
+ if (env_value)
+ xsc_stall_cq_inc_step = atoi(env_value);
+
+ env_value = getenv("XSC_STALL_CQ_DEC_STEP");
+ if (env_value)
+ xsc_stall_cq_dec_step = atoi(env_value);
+
+ ctx->stall_adaptive_enable = 0;
+ ctx->stall_cycles = 0;
+
+ if (xsc_stall_num_loop < 0) {
+ ctx->stall_adaptive_enable = 1;
+ ctx->stall_cycles = xsc_stall_cq_poll_min;
+ }
+
+}
+
+static void open_debug_file(struct xsc_context *ctx)
+{
+ char *env;
+
+ env = getenv("XSC_DEBUG_FILE");
+ if (!env) {
+ ctx->dbg_fp = stderr;
+ return;
+ }
+
+ ctx->dbg_fp = fopen(env, "aw+");
+ if (!ctx->dbg_fp) {
+ fprintf(stderr, "Failed opening debug file %s, using stderr\n", env);
+ ctx->dbg_fp = stderr;
+ return;
+ }
+}
+
+static void close_debug_file(struct xsc_context *ctx)
+{
+ if (ctx->dbg_fp && ctx->dbg_fp != stderr)
+ fclose(ctx->dbg_fp);
+}
+
+static void set_debug_mask(void)
+{
+ char *env;
+
+ env = getenv("XSC_DEBUG_MASK");
+ if (env)
+ xsc_debug_mask = strtol(env, NULL, 0);
+}
+
+static void set_freeze_on_error(void)
+{
+ char *env;
+
+ env = getenv("XSC_FREEZE_ON_ERROR_CQE");
+ if (env)
+ xsc_freeze_on_error_cqe = strtol(env, NULL, 0);
+}
+
+static int single_threaded_app(void)
+{
+
+ char *env;
+
+ env = getenv("XSC_SINGLE_THREADED");
+ if (env)
+ return strcmp(env, "1") ? 0 : 1;
+
+ return 0;
+}
+
+static int xsc_cmd_get_context(struct xsc_context *context,
+ struct xsc_alloc_ucontext *req,
+ size_t req_len,
+ struct xsc_alloc_ucontext_resp *resp,
+ size_t resp_len)
+{
+ struct verbs_context *verbs_ctx = &context->ibv_ctx;
+
+ return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd,
+ req_len, &resp->ibv_resp, resp_len);
+}
+
+int xscdv_query_device(struct ibv_context *ctx_in,
+ struct xscdv_context *attrs_out)
+{
+ struct xsc_context *xctx = to_xctx(ctx_in);
+ uint64_t comp_mask_out = 0;
+
+ attrs_out->version = 0;
+ attrs_out->flags = 0;
+
+ if (xctx->cqe_version == XSC_CQE_VERSION_V1)
+ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_V1;
+
+ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED)
+ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_MPW_ALLOWED;
+
+ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP)
+ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_COMP;
+
+ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD)
+ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_CQE_128B_PAD;
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CQE_COMPRESION) {
+ attrs_out->cqe_comp_caps = xctx->cqe_comp_caps;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_CQE_COMPRESION;
+ }
+
+ if (xctx->vendor_cap_flags & XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW)
+ attrs_out->flags |= XSCDV_CONTEXT_FLAGS_ENHANCED_MPW;
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_SWP) {
+ attrs_out->sw_parsing_caps = xctx->sw_parsing_caps;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_SWP;
+ }
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_STRIDING_RQ) {
+ attrs_out->striding_rq_caps = xctx->striding_rq_caps;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_STRIDING_RQ;
+ }
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
+ attrs_out->tunnel_offloads_caps = xctx->tunnel_offloads_caps;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS;
+ }
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE) {
+ if (xctx->clock_info_page) {
+ attrs_out->max_clock_info_update_nsec =
+ xctx->clock_info_page->overflow_period;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE;
+ }
+ }
+
+ if (attrs_out->comp_mask & XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS) {
+ attrs_out->flow_action_flags = xctx->flow_action_flags;
+ comp_mask_out |= XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS;
+ }
+
+ attrs_out->comp_mask = comp_mask_out;
+
+ return 0;
+}
+
+static int xscdv_get_qp(struct ibv_qp *qp_in,
+ struct xscdv_qp *qp_out)
+{
+ struct xsc_qp *xqp = to_xqp(qp_in);
+ uint64_t mask_out = 0;
+
+ qp_out->dbrec = xqp->db;
+ qp_out->sq.db = xqp->sq.db;
+ qp_out->rq.db = xqp->rq.db;
+
+ if (xqp->sq_buf_size)
+ /* IBV_QPT_RAW_PACKET */
+ qp_out->sq.buf = (void *)((uintptr_t)xqp->sq_buf.buf);
+ else
+ qp_out->sq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->sq.offset);
+ qp_out->sq.wqe_cnt = xqp->sq.wqe_cnt;
+ qp_out->sq.stride = 1 << xqp->sq.wqe_shift;
+
+ qp_out->rq.buf = (void *)((uintptr_t)xqp->buf.buf + xqp->rq.offset);
+ qp_out->rq.wqe_cnt = xqp->rq.wqe_cnt;
+ qp_out->rq.stride = 1 << xqp->rq.wqe_shift;
+
+ if (qp_out->comp_mask & XSCDV_QP_MASK_RAW_QP_HANDLES) {
+ qp_out->tirn = xqp->tirn;
+ qp_out->tisn = xqp->tisn;
+ qp_out->rqn = xqp->rqn;
+ qp_out->sqn = xqp->sqn;
+ mask_out |= XSCDV_QP_MASK_RAW_QP_HANDLES;
+ }
+
+ qp_out->comp_mask = mask_out;
+
+ return 0;
+}
+
+static int xscdv_get_cq(struct ibv_cq *cq_in,
+ struct xscdv_cq *cq_out)
+{
+ struct xsc_cq *xcq = to_xcq(cq_in);
+
+ cq_out->comp_mask = 0;
+ cq_out->cqn = xcq->cqn;
+ cq_out->cqe_cnt = xcq->verbs_cq.cq_ex.cqe;
+ cq_out->cqe_size = xcq->cqe_sz;
+ cq_out->buf = xcq->active_buf->buf;
+ cq_out->dbrec = xcq->dbrec;
+ cq_out->db = xcq->db;
+ xcq->flags |= XSC_CQ_FLAGS_DV_OWNED;
+
+ return 0;
+}
+
+static int xscdv_get_rwq(struct ibv_wq *wq_in,
+ struct xscdv_rwq *rwq_out)
+{
+ struct xsc_rwq *xrwq = to_xrwq(wq_in);
+
+ rwq_out->comp_mask = 0;
+ rwq_out->buf = xrwq->pbuff;
+ rwq_out->dbrec = xrwq->recv_db;
+ rwq_out->wqe_cnt = xrwq->rq.wqe_cnt;
+ rwq_out->stride = 1 << xrwq->rq.wqe_shift;
+ rwq_out->db = xrwq->rq.db;
+
+ return 0;
+}
+
+static int xscdv_get_dm(struct ibv_dm *dm_in,
+ struct xscdv_dm *dm_out)
+{
+ struct xsc_dm *xdm = to_xdm(dm_in);
+
+ dm_out->comp_mask = 0;
+ dm_out->buf = xdm->start_va;
+ dm_out->length = xdm->length;
+
+ return 0;
+}
+
+static int xscdv_get_av(struct ibv_ah *ah_in,
+ struct xscdv_ah *ah_out)
+{
+ struct xsc_ah *xah = to_xah(ah_in);
+
+ ah_out->comp_mask = 0;
+ ah_out->av = &xah->av;
+
+ return 0;
+}
+
+static int xscdv_get_pd(struct ibv_pd *pd_in,
+ struct xscdv_pd *pd_out)
+{
+ struct xsc_pd *xpd = to_xpd(pd_in);
+
+ pd_out->comp_mask = 0;
+ pd_out->pdn = xpd->pdn;
+
+ return 0;
+}
+
+ int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type)
+{
+ int ret = 0;
+
+ if (obj_type & XSCDV_OBJ_QP)
+ ret = xscdv_get_qp(obj->qp.in, obj->qp.out);
+ if (!ret && (obj_type & XSCDV_OBJ_CQ))
+ ret = xscdv_get_cq(obj->cq.in, obj->cq.out);
+ if (!ret && (obj_type & XSCDV_OBJ_RWQ))
+ ret = xscdv_get_rwq(obj->rwq.in, obj->rwq.out);
+ if (!ret && (obj_type & XSCDV_OBJ_DM))
+ ret = xscdv_get_dm(obj->dm.in, obj->dm.out);
+ if (!ret && (obj_type & XSCDV_OBJ_AH))
+ ret = xscdv_get_av(obj->ah.in, obj->ah.out);
+ if (!ret && (obj_type & XSCDV_OBJ_PD))
+ ret = xscdv_get_pd(obj->pd.in, obj->pd.out);
+
+ return ret;
+}
+
+int xscdv_set_context_attr(struct ibv_context *ibv_ctx,
+ enum xscdv_set_ctx_attr_type type, void *attr)
+{
+ struct xsc_context *ctx = to_xctx(ibv_ctx);
+
+ switch (type) {
+ case XSCDV_CTX_ATTR_BUF_ALLOCATORS:
+ ctx->extern_alloc = *((struct xscdv_ctx_allocators *)attr);
+ break;
+ default:
+ return ENOTSUP;
+ }
+
+ return 0;
+}
+
+int xscdv_get_clock_info(struct ibv_context *ctx_in,
+ struct xscdv_clock_info *clock_info)
+{
+ struct xsc_context *ctx = to_xctx(ctx_in);
+ const struct xsc_ib_clock_info *ci = ctx->clock_info_page;
+ uint32_t retry, tmp_sig;
+ atomic_uint32_t *sig;
+
+ if (!ci)
+ return EINVAL;
+
+ sig = (atomic_uint32_t *)&ci->sign;
+
+ do {
+ retry = 10;
+repeat:
+ tmp_sig = atomic_load(sig);
+ if (unlikely(tmp_sig &
+ XSC_IB_CLOCK_INFO_KERNEL_UPDATING)) {
+ if (--retry)
+ goto repeat;
+ return EBUSY;
+ }
+ clock_info->nsec = ci->nsec;
+ clock_info->last_cycles = ci->cycles;
+ clock_info->frac = ci->frac;
+ clock_info->mult = ci->mult;
+ clock_info->shift = ci->shift;
+ clock_info->mask = ci->mask;
+ } while (unlikely(tmp_sig != atomic_load(sig)));
+
+ return 0;
+}
+
+struct ibv_context *
+xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr)
+{
+ return verbs_open_device(device, attr);
+}
+
+static int xsc_mmap(struct xsc_device *xdev, struct xsc_context *context,
+ int cmd_fd, int size)
+{
+ uint64_t page_mask;
+
+ page_mask = (~(xdev->page_size - 1));
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "page size:%d\n", size);
+ context->sqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ cmd_fd, context->qpm_tx_db & page_mask);
+ if (context->sqm_reg_va == MAP_FAILED) {
+ return -1;
+ }
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->sqm_reg_va);
+
+ context->rqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ cmd_fd, context->qpm_rx_db & page_mask);
+ if (context->rqm_reg_va == MAP_FAILED) {
+ goto free_sqm;
+ }
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n", context->rqm_reg_va);
+
+ context->cqm_reg_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ cmd_fd, context->cqm_next_cid_reg & page_mask);
+ if (context->cqm_reg_va == MAP_FAILED) {
+ goto free_rqm;
+ }
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm ci va:%p\n", context->cqm_reg_va);
+ context->db_mmap_size = size;
+
+ context->cqm_armdb_va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ cmd_fd, context->cqm_armdb & page_mask);
+ if (context->cqm_armdb_va == MAP_FAILED) {
+ goto free_cqm;
+ }
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm armdb va:%p\n", context->cqm_armdb_va);
+
+ return 0;
+
+free_cqm:
+ munmap(context->cqm_reg_va, size);
+free_rqm:
+ munmap(context->rqm_reg_va, size);
+free_sqm:
+ munmap(context->sqm_reg_va, size);
+
+ return -1;
+
+}
+static void xsc_munmap(struct xsc_context *context)
+{
+ if (context->sqm_reg_va)
+ munmap(context->sqm_reg_va, context->db_mmap_size);
+
+ if (context->rqm_reg_va)
+ munmap(context->rqm_reg_va, context->db_mmap_size);
+
+ if (context->cqm_reg_va)
+ munmap(context->cqm_reg_va, context->db_mmap_size);
+
+ if (context->cqm_armdb_va)
+ munmap(context->cqm_armdb_va, context->db_mmap_size);
+
+}
+static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev,
+ int cmd_fd,
+ void *private_data)
+{
+ struct xsc_context *context;
+ struct xsc_alloc_ucontext req;
+ struct xsc_alloc_ucontext_resp resp;
+ int i;
+ int page_size;
+ int j;
+ struct xsc_device *xdev = to_xdev(ibdev);
+ struct verbs_context *v_ctx;
+ struct ibv_port_attr port_attr;
+ struct ibv_device_attr_ex device_attr;
+ struct xscdv_context_attr *ctx_attr = private_data;
+
+ if (ctx_attr && ctx_attr->comp_mask) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
+ RDMA_DRIVER_XSC);
+ if (!context)
+ return NULL;
+
+ v_ctx = &context->ibv_ctx;
+ page_size = xdev->page_size;
+ xsc_single_threaded = single_threaded_app();
+
+ open_debug_file(context);
+ set_debug_mask();
+ set_freeze_on_error();
+ if (gethostname(context->hostname, sizeof(context->hostname)))
+ strcpy(context->hostname, "host_unknown");
+
+ memset(&req, 0, sizeof(req));
+ memset(&resp, 0, sizeof(resp));
+
+ if (xsc_cmd_get_context(context, &req, sizeof(req), &resp,
+ sizeof(resp)))
+ goto err_free;
+
+ context->max_num_qps = resp.qp_tab_size;
+ context->cache_line_size = resp.cache_line_size;
+ context->max_sq_desc_sz = resp.max_sq_desc_sz;
+ context->max_rq_desc_sz = resp.max_rq_desc_sz;
+ context->max_send_wqebb = resp.max_send_wqebb;
+ context->num_ports = resp.num_ports;
+ context->max_recv_wr = resp.max_recv_wr;
+ context->qpm_tx_db = resp.qpm_tx_db;
+ context->qpm_rx_db = resp.qpm_rx_db;
+ context->cqm_next_cid_reg = resp.cqm_next_cid_reg;
+ context->cqm_armdb = resp.cqm_armdb;
+ context->send_ds_num = resp.send_ds_num;
+ context->send_ds_shift = xsc_ilog2(resp.send_ds_num);
+ context->recv_ds_num = resp.recv_ds_num;
+ context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num);
+
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX,
+ "max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u " \
+ "max_send_wqebb:%u, num_ports:%u, max_recv_wr:%u\n",
+ context->max_num_qps, context->max_sq_desc_sz,
+ context->max_rq_desc_sz, context->max_send_wqebb,
+ context->num_ports, context->max_recv_wr);
+
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "send_ds_num:%u shift:%u recv_ds_num:%u shift:%u\n",
+ context->send_ds_num, context->send_ds_shift,
+ context->recv_ds_num, context->recv_ds_shift);
+ context->dump_fill_mkey = XSC_INVALID_LKEY;
+ context->dump_fill_mkey_be = htobe32(XSC_INVALID_LKEY);
+ context->eth_min_inline_size = XSC_ETH_L2_INLINE_HEADER_SIZE;
+ context->cmds_supp_uhw = resp.cmds_supp_uhw;
+
+ pthread_mutex_init(&context->qp_table_mutex, NULL);
+ pthread_mutex_init(&context->uidx_table_mutex, NULL);
+ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i)
+ context->qp_table[i].refcnt = 0;
+
+ for (i = 0; i < XSC_QP_TABLE_SIZE; ++i)
+ context->uidx_table[i].refcnt = 0;
+
+ context->db_list = NULL;
+ context->page_size = page_size;
+ if (xsc_mmap(xdev, context, cmd_fd, page_size))
+ goto err_free;
+
+ pthread_mutex_init(&context->db_list_mutex, NULL);
+
+ context->hca_core_clock = NULL;
+ context->clock_info_page = NULL;
+
+ xsc_read_env(ibdev, context);
+
+ xsc_spinlock_init(&context->hugetlb_lock, !xsc_single_threaded);
+ list_head_init(&context->hugetlb_list);
+
+ verbs_set_ops(v_ctx, &xsc_ctx_common_ops);
+
+ memset(&device_attr, 0, sizeof(device_attr));
+ if (!xsc_query_device_ex(&v_ctx->context, NULL, &device_attr,
+ sizeof(struct ibv_device_attr_ex))) {
+ context->cached_device_cap_flags =
+ device_attr.orig_attr.device_cap_flags;
+ context->atomic_cap = device_attr.orig_attr.atomic_cap;
+ context->cached_tso_caps = device_attr.tso_caps;
+ context->max_dm_size = device_attr.max_dm_size;
+ }
+
+ for (j = 0; j < min(XSC_MAX_PORTS_NUM, context->num_ports); ++j) {
+ memset(&port_attr, 0, sizeof(port_attr));
+ if (!xsc_query_port(&v_ctx->context, j + 1, &port_attr)) {
+ context->cached_link_layer[j] = port_attr.link_layer;
+ context->cached_port_flags[j] = port_attr.flags;
+ }
+ }
+
+ return v_ctx;
+
+err_free:
+ verbs_uninit_context(&context->ibv_ctx);
+ close_debug_file(context);
+ free(context);
+ return NULL;
+}
+
+static void xsc_free_context(struct ibv_context *ibctx)
+{
+ struct xsc_context *context = to_xctx(ibctx);
+
+ xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "\n");
+ xsc_munmap(context);
+
+ verbs_uninit_context(&context->ibv_ctx);
+ close_debug_file(context);
+ free(context);
+}
+
+static void xsc_uninit_device(struct verbs_device *verbs_device)
+{
+ struct xsc_device *xdev = to_xdev(&verbs_device->device);
+
+ free(xdev);
+}
+
+static struct verbs_device *xsc_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
+{
+ struct xsc_device *xdev;
+
+ xdev = calloc(1, sizeof *xdev);
+ if (!xdev)
+ return NULL;
+
+ xdev->page_size = sysconf(_SC_PAGESIZE);
+ xdev->driver_abi_ver = sysfs_dev->abi_ver;
+
+ return &xdev->verbs_dev;
+}
+
+static const struct verbs_device_ops xsc_dev_ops = {
+ .name = "xscale",
+ .match_min_abi_version = XSC_UVERBS_MIN_ABI_VERSION,
+ .match_max_abi_version = XSC_UVERBS_MAX_ABI_VERSION,
+ .match_table = hca_table,
+ .alloc_device = xsc_device_alloc,
+ .uninit_device = xsc_uninit_device,
+ .alloc_context = xsc_alloc_context,
+};
+PROVIDER_DRIVER(xscale, xsc_dev_ops);
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
new file mode 100644
index 0000000..0aee472
--- /dev/null
+++ b/providers/xscale/xscale.h
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSCALE_H
+#define XSCALE_H
+
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <util/compiler.h>
+
+#include <infiniband/driver.h>
+#include <util/udma_barrier.h>
+#include "xsc-abi.h"
+#include <ccan/list.h>
+#include "bitmap.h"
+#include <ccan/minmax.h>
+#include "xscdv.h"
+
+#include <valgrind/memcheck.h>
+
+#define PFX "xsc: "
+#define offsetofend(_type, _member) \
+ (offsetof(_type, _member) + sizeof(((_type *)0)->_member))
+
+typedef _Atomic(uint32_t) atomic_uint32_t;
+
+enum {
+ XSC_IB_MMAP_CMD_SHIFT = 8,
+ XSC_IB_MMAP_CMD_MASK = 0xff,
+};
+
+enum {
+ XSC_CQE_VERSION_V0 = 0,
+ XSC_CQE_VERSION_V1 = 1,
+};
+
+enum {
+ XSC_ADAPTER_PAGE_SIZE = 4096,
+};
+
+enum {
+ XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9,
+ XSC_QP_FLAG_RAWPACKET_TX = 1 << 10,
+};
+
+
+#define XSC_CQ_PREFIX "XSC_CQ"
+#define XSC_QP_PREFIX "XSC_QP"
+#define XSC_MR_PREFIX "XSC_MR"
+#define XSC_RWQ_PREFIX "XSC_RWQ"
+#define XSC_MAX_LOG2_CONTIG_BLOCK_SIZE 23
+#define XSC_MIN_LOG2_CONTIG_BLOCK_SIZE 12
+
+enum {
+ XSC_DBG_QP = 1 << 0,
+ XSC_DBG_CQ = 1 << 1,
+ XSC_DBG_QP_SEND = 1 << 2,
+ XSC_DBG_QP_SEND_ERR = 1 << 3,
+ XSC_DBG_CQ_CQE = 1 << 4,
+ XSC_DBG_CONTIG = 1 << 5,
+ XSC_DBG_DR = 1 << 6,
+ XSC_DBG_CTX = 1 << 7,
+ XSC_DBG_PD = 1 << 8,
+ XSC_DBG_MR = 1 << 9,
+};
+
+extern uint32_t xsc_debug_mask;
+extern int xsc_freeze_on_error_cqe;
+
+#define XSC_DEBUG
+#ifdef XSC_DEBUG
+#define xsc_dbg(fp, mask, fmt, args...) \
+do { \
+ if (xsc_debug_mask & mask) { \
+ char host[256]; \
+ char timestr[32]; \
+ struct tm now_tm; \
+ time_t now_time; \
+ time(&now_time); \
+ localtime_r(&now_time, &now_tm); \
+ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \
+ gethostname(host, 256); \
+ fprintf(fp, "[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \
+ } \
+} while (0)
+#else
+static inline void xsc_dbg(FILE *fp, uint32_t mask, const char *fmt, ...)
+{
+}
+#endif
+
+#define xsc_err(fmt, args...) \
+do { \
+ char host[256]; \
+ char timestr[32]; \
+ struct tm now_tm; \
+ time_t now_time; \
+ time(&now_time); \
+ localtime_r(&now_time, &now_tm); \
+ strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm); \
+ gethostname(host, 256); \
+ printf("[%s %s %s %d] " fmt, timestr, host, __func__, __LINE__, ##args); \
+} while (0)
+
+enum {
+ XSC_QP_TABLE_SHIFT = 12,
+ XSC_QP_TABLE_MASK = (1 << XSC_QP_TABLE_SHIFT) - 1,
+ XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT),
+};
+
+enum {
+ XSC_UIDX_TABLE_SHIFT = 12,
+ XSC_UIDX_TABLE_MASK = (1 << XSC_UIDX_TABLE_SHIFT) - 1,
+ XSC_UIDX_TABLE_SIZE = 1 << (24 - XSC_UIDX_TABLE_SHIFT),
+};
+
+enum {
+ XSC_MAX_PORTS_NUM = 2,
+};
+
+enum xsc_alloc_type {
+ XSC_ALLOC_TYPE_ANON,
+ XSC_ALLOC_TYPE_HUGE,
+ XSC_ALLOC_TYPE_CONTIG,
+ XSC_ALLOC_TYPE_PREFER_HUGE,
+ XSC_ALLOC_TYPE_PREFER_CONTIG,
+ XSC_ALLOC_TYPE_EXTERNAL,
+ XSC_ALLOC_TYPE_ALL
+};
+
+enum xsc_rsc_type {
+ XSC_RSC_TYPE_QP,
+ XSC_RSC_TYPE_XSRQ,
+ XSC_RSC_TYPE_SRQ,
+ XSC_RSC_TYPE_RWQ,
+ XSC_RSC_TYPE_INVAL,
+};
+
+enum xsc_vendor_cap_flags {
+ XSC_VENDOR_CAP_FLAGS_MPW = 1 << 0, /* Obsoleted */
+ XSC_VENDOR_CAP_FLAGS_MPW_ALLOWED = 1 << 1,
+ XSC_VENDOR_CAP_FLAGS_ENHANCED_MPW = 1 << 2,
+ XSC_VENDOR_CAP_FLAGS_CQE_128B_COMP = 1 << 3,
+ XSC_VENDOR_CAP_FLAGS_CQE_128B_PAD = 1 << 4,
+};
+
+enum {
+ XSC_FLOW_TAG_MASK = 0x00ffffff,
+};
+
+struct xsc_resource {
+ enum xsc_rsc_type type;
+ uint32_t rsn;
+};
+
+struct xsc_device {
+ struct verbs_device verbs_dev;
+ int page_size;
+ int driver_abi_ver;
+};
+
+struct xsc_db_page;
+
+struct xsc_spinlock {
+ pthread_spinlock_t lock;
+ int in_use;
+ int need_lock;
+};
+
+/* PAGE_SHIFT determines the page size */
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+struct xsc_context {
+ struct verbs_context ibv_ctx;
+ int max_num_qps;
+ struct {
+ struct xsc_qp **table;
+ int refcnt;
+ } qp_table[XSC_QP_TABLE_SIZE];
+ pthread_mutex_t qp_table_mutex;
+
+ struct {
+ struct xsc_resource **table;
+ int refcnt;
+ } uidx_table[XSC_UIDX_TABLE_SIZE];
+ pthread_mutex_t uidx_table_mutex;
+
+ struct xsc_db_page *db_list;
+ pthread_mutex_t db_list_mutex;
+ int cache_line_size;
+ int max_sq_desc_sz;
+ int max_rq_desc_sz;
+ int max_send_wqebb;
+ int max_recv_wr;
+ int num_ports;
+ int stall_enable;
+ int stall_adaptive_enable;
+ int stall_cycles;
+ char hostname[40];
+ struct xsc_spinlock hugetlb_lock;
+ struct list_head hugetlb_list;
+ int cqe_version;
+ uint8_t cached_link_layer[XSC_MAX_PORTS_NUM];
+ uint8_t cached_port_flags[XSC_MAX_PORTS_NUM];
+ unsigned int cached_device_cap_flags;
+ enum ibv_atomic_cap atomic_cap;
+ struct {
+ uint64_t offset;
+ uint64_t mask;
+ } core_clock;
+ void *hca_core_clock;
+ const struct xsc_ib_clock_info *clock_info_page;
+ struct ibv_tso_caps cached_tso_caps;
+ int cmds_supp_uhw;
+ uint64_t vendor_cap_flags; /* Use enum xsc_vendor_cap_flags */
+ struct xscdv_cqe_comp_caps cqe_comp_caps;
+ struct xscdv_ctx_allocators extern_alloc;
+ struct xscdv_sw_parsing_caps sw_parsing_caps;
+ struct xscdv_striding_rq_caps striding_rq_caps;
+ uint32_t tunnel_offloads_caps;
+ struct xsc_packet_pacing_caps packet_pacing_caps;
+ uint16_t flow_action_flags;
+ uint64_t max_dm_size;
+ uint32_t eth_min_inline_size;
+ uint32_t dump_fill_mkey;
+ __be32 dump_fill_mkey_be;
+ void *sqm_reg_va;
+ void *rqm_reg_va;
+ void *cqm_reg_va;
+ void *cqm_armdb_va;
+ int db_mmap_size;
+ uint32_t page_size;
+ uint64_t qpm_tx_db;
+ uint64_t qpm_rx_db;
+ uint64_t cqm_next_cid_reg;
+ uint64_t cqm_armdb;
+ uint32_t send_ds_num;
+ uint32_t recv_ds_num;
+ uint32_t send_ds_shift;
+ uint32_t recv_ds_shift;
+ FILE *dbg_fp;
+};
+
+struct xsc_bitmap {
+ uint32_t last;
+ uint32_t top;
+ uint32_t max;
+ uint32_t avail;
+ uint32_t mask;
+ unsigned long *table;
+};
+
+struct xsc_hugetlb_mem {
+ int shmid;
+ void *shmaddr;
+ struct xsc_bitmap bitmap;
+ struct list_node entry;
+};
+
+struct xsc_buf {
+ void *buf;
+ size_t length;
+ int base;
+ struct xsc_hugetlb_mem *hmem;
+ enum xsc_alloc_type type;
+};
+
+struct xsc_pd {
+ struct ibv_pd ibv_pd;
+ uint32_t pdn;
+ atomic_int refcount;
+ struct xsc_pd *xprotection_domain;
+};
+
+struct xsc_parent_domain {
+ struct xsc_pd xpd;
+};
+
+enum {
+ XSC_CQ_FLAGS_RX_CSUM_VALID = 1 << 0,
+ XSC_CQ_FLAGS_EMPTY_DURING_POLL = 1 << 1,
+ XSC_CQ_FLAGS_FOUND_CQES = 1 << 2,
+ XSC_CQ_FLAGS_EXTENDED = 1 << 3,
+ XSC_CQ_FLAGS_SINGLE_THREADED = 1 << 4,
+ XSC_CQ_FLAGS_DV_OWNED = 1 << 5,
+ XSC_CQ_FLAGS_TM_SYNC_REQ = 1 << 6,
+};
+
+struct xsc_err_state_qp_node {
+ struct list_node entry;
+ uint32_t qp_id;
+ bool is_sq;
+};
+
+struct xsc_cq {
+ /* ibv_cq should always be subset of ibv_cq_ex */
+ struct verbs_cq verbs_cq;
+ struct xsc_buf buf_a;
+ struct xsc_buf buf_b;
+ struct xsc_buf *active_buf;
+ struct xsc_buf *resize_buf;
+ int resize_cqes;
+ int active_cqes;
+ struct xsc_spinlock lock;
+ uint32_t cqn;
+ uint32_t cons_index;
+ __le32 *dbrec;
+ __le32 *db;
+ __le32 *armdb;
+ uint32_t cqe_cnt;
+ int log2_cq_ring_sz;
+ int arm_sn;
+ int cqe_sz;
+ int resize_cqe_sz;
+ int stall_next_poll;
+ int stall_enable;
+ uint64_t stall_last_count;
+ int stall_adaptive_enable;
+ int stall_cycles;
+ struct xsc_resource *cur_rsc;
+ struct xsc_cqe64 *cqe64;
+ uint32_t flags;
+ int umr_opcode;
+ struct xscdv_clock_info last_clock_info;
+ bool disable_flush_error_cqe;
+ struct list_head err_state_qp_list;
+};
+
+struct wr_list {
+ uint16_t opcode;
+ uint16_t next;
+};
+
+struct xsc_wq {
+ uint64_t *wrid;
+ unsigned *wqe_head;
+ struct xsc_spinlock lock;
+ unsigned wqe_cnt;
+ unsigned max_post;
+ unsigned head;
+ unsigned tail;
+ unsigned cur_post;
+ int max_gs;
+ int wqe_shift;
+ int offset;
+ void *qend;
+ uint32_t *wr_data;
+ __le32 *db;
+ unsigned ds_cnt;
+ unsigned seg_cnt;
+ unsigned *wr_opcode;
+ unsigned *need_flush;
+ unsigned flush_wqe_cnt;
+};
+
+struct xsc_dm {
+ struct verbs_dm verbs_dm;
+ size_t length;
+ void *mmap_va;
+ void *start_va;
+};
+
+struct xsc_mr {
+ struct verbs_mr vmr;
+ struct xsc_buf buf;
+ uint32_t alloc_flags;
+};
+
+enum xsc_qp_flags {
+ XSC_QP_FLAGS_USE_UNDERLAY = 0x01,
+};
+
+struct xsc_qp {
+ struct xsc_resource rsc; /* This struct must be first */
+ struct verbs_qp verbs_qp;
+ struct ibv_qp *ibv_qp;
+ struct xsc_buf buf;
+ void *sq_start;
+ void *rq_start;
+ int max_inline_data;
+ int buf_size;
+ /* For Raw Packet QP, use different buffers for the SQ and RQ */
+ struct xsc_buf sq_buf;
+ int sq_buf_size;
+
+ uint8_t fm_cache;
+ uint8_t sq_signal_bits;
+ struct xsc_wq sq;
+
+ __le32 *db;
+ struct xsc_wq rq;
+ int wq_sig;
+ uint32_t qp_cap_cache;
+ int atomics_enabled;
+ uint32_t max_tso;
+ uint16_t max_tso_header;
+ int rss_qp;
+ uint32_t flags; /* Use enum xsc_qp_flags */
+ enum xscdv_dc_type dc_type;
+ uint32_t tirn;
+ uint32_t tisn;
+ uint32_t rqn;
+ uint32_t sqn;
+};
+
+struct xsc_ah {
+ struct ibv_ah ibv_ah;
+ struct xsc_wqe_av av;
+ bool kern_ah;
+};
+
+struct xsc_rwq {
+ struct xsc_resource rsc;
+ struct ibv_wq wq;
+ struct xsc_buf buf;
+ int buf_size;
+ struct xsc_wq rq;
+ __le32 *db;
+ void *pbuff;
+ __le32 *recv_db;
+ int wq_sig;
+};
+
+struct xsc_counter_node {
+ uint32_t index;
+ struct list_node entry;
+ enum ibv_counter_description desc;
+};
+
+struct xsc_counters {
+ struct verbs_counters vcounters;
+ struct list_head counters_list;
+ pthread_mutex_t lock;
+ uint32_t ncounters;
+ /* number of bounded objects */
+ int refcount;
+};
+
+struct xsc_flow {
+ struct ibv_flow flow_id;
+ struct xsc_counters *mcounters;
+};
+
+struct xscdv_flow_matcher {
+ struct ibv_context *context;
+ uint32_t handle;
+};
+
+struct xscdv_devx_obj {
+ struct ibv_context *context;
+ uint32_t handle;
+};
+
+struct xsc_devx_umem {
+ struct xscdv_devx_umem dv_devx_umem;
+ struct ibv_context *context;
+ uint32_t handle;
+ void *addr;
+ size_t size;
+};
+
+union xsc_ib_fw_ver {
+ uint64_t data;
+ struct {
+ uint8_t ver_major;
+ uint8_t ver_minor;
+ uint16_t ver_patch;
+ uint32_t ver_tweak;
+ } s;
+};
+
+static inline int xsc_ilog2(int n)
+{
+ int t;
+
+ if (n <= 0)
+ return -1;
+
+ t = 0;
+ while ((1 << t) < n)
+ ++t;
+
+ return t;
+}
+
+extern int xsc_stall_num_loop;
+extern int xsc_stall_cq_poll_min;
+extern int xsc_stall_cq_poll_max;
+extern int xsc_stall_cq_inc_step;
+extern int xsc_stall_cq_dec_step;
+extern int xsc_single_threaded;
+
+static inline unsigned DIV_ROUND_UP(unsigned n, unsigned d)
+{
+ return (n + d - 1u) / d;
+}
+
+static inline unsigned long align(unsigned long val, unsigned long align)
+{
+ return (val + align - 1) & ~(align - 1);
+}
+
+static inline struct xsc_device *to_xdev(struct ibv_device *ibdev)
+{
+ return container_of(ibdev, struct xsc_device, verbs_dev.device);
+}
+
+static inline struct xsc_context *to_xctx(struct ibv_context *ibctx)
+{
+ return container_of(ibctx, struct xsc_context, ibv_ctx.context);
+}
+
+/* to_xpd always returns the real xsc_pd object ie the protection domain. */
+static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd)
+{
+ struct xsc_pd *xpd = container_of(ibpd, struct xsc_pd, ibv_pd);
+
+ if (xpd->xprotection_domain)
+ return xpd->xprotection_domain;
+
+ return xpd;
+}
+
+static inline struct xsc_parent_domain *to_xparent_domain(struct ibv_pd *ibpd)
+{
+ struct xsc_parent_domain *xparent_domain =
+ ibpd ? container_of(ibpd, struct xsc_parent_domain, xpd.ibv_pd) : NULL;
+
+ if (xparent_domain && xparent_domain->xpd.xprotection_domain)
+ return xparent_domain;
+
+ /* Otherwise ibpd isn't a parent_domain */
+ return NULL;
+}
+
+static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq)
+{
+ return container_of((struct ibv_cq_ex *)ibcq, struct xsc_cq, verbs_cq.cq_ex);
+}
+
+static inline struct xsc_qp *to_xqp(struct ibv_qp *ibqp)
+{
+ struct verbs_qp *vqp = (struct verbs_qp *)ibqp;
+
+ return container_of(vqp, struct xsc_qp, verbs_qp);
+}
+
+static inline struct xsc_rwq *to_xrwq(struct ibv_wq *ibwq)
+{
+ return container_of(ibwq, struct xsc_rwq, wq);
+}
+
+static inline struct xsc_dm *to_xdm(struct ibv_dm *ibdm)
+{
+ return container_of(ibdm, struct xsc_dm, verbs_dm.dm);
+}
+
+static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr)
+{
+ return container_of(ibmr, struct xsc_mr, vmr.ibv_mr);
+}
+
+static inline struct xsc_ah *to_xah(struct ibv_ah *ibah)
+{
+ return container_of(ibah, struct xsc_ah, ibv_ah);
+}
+
+static inline int max_int(int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline struct xsc_qp *rsc_to_xqp(struct xsc_resource *rsc)
+{
+ return (struct xsc_qp *)rsc;
+}
+
+static inline struct xsc_rwq *rsc_to_mrwq(struct xsc_resource *rsc)
+{
+ return (struct xsc_rwq *)rsc;
+}
+
+static inline struct xsc_counters *to_mcounters(struct ibv_counters *ibcounters)
+{
+ return container_of(ibcounters, struct xsc_counters, vcounters.counters);
+}
+
+static inline struct xsc_flow *to_mflow(struct ibv_flow *flow_id)
+{
+ return container_of(flow_id, struct xsc_flow, flow_id);
+}
+
+int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size);
+void xsc_free_buf(struct xsc_buf *buf);
+int xsc_alloc_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf,
+ size_t size, int page_size, const char *component);
+void xsc_free_buf_contig(struct xsc_context *xctx, struct xsc_buf *buf);
+int xsc_alloc_prefered_buf(struct xsc_context *xctx,
+ struct xsc_buf *buf,
+ size_t size, int page_size,
+ enum xsc_alloc_type alloc_type,
+ const char *component);
+int xsc_free_actual_buf(struct xsc_context *ctx, struct xsc_buf *buf);
+void xsc_get_alloc_type(struct xsc_context *context,
+ const char *component,
+ enum xsc_alloc_type *alloc_type,
+ enum xsc_alloc_type default_alloc_type);
+int xsc_use_huge(const char *key);
+bool xsc_is_extern_alloc(struct xsc_context *context);
+int xsc_alloc_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf,
+ size_t size);
+void xsc_free_buf_extern(struct xsc_context *ctx, struct xsc_buf *buf);
+
+__le32 *xsc_alloc_dbrec(struct xsc_context *context);
+void xsc_free_db(struct xsc_context *context, __le32 *db);
+
+int xsc_query_device(struct ibv_context *context,
+ struct ibv_device_attr *attr);
+int xsc_query_device_ex(struct ibv_context *context,
+ const struct ibv_query_device_ex_input *input,
+ struct ibv_device_attr_ex *attr,
+ size_t attr_size);
+int xsc_query_rt_values(struct ibv_context *context,
+ struct ibv_values_ex *values);
+struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr);
+int xsc_query_port(struct ibv_context *context, uint8_t port,
+ struct ibv_port_attr *attr);
+
+struct ibv_pd *xsc_alloc_pd(struct ibv_context *context);
+int xsc_free_pd(struct ibv_pd *pd);
+
+struct ibv_mr *xsc_alloc_null_mr(struct ibv_pd *pd);
+struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr,
+ size_t length, uint64_t hca_va, int access);
+int xsc_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr,
+ size_t length, int access);
+int xsc_dereg_mr(struct verbs_mr *mr);
+struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_comp_channel *channel,
+ int comp_vector);
+struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr);
+int xsc_cq_fill_pfns(struct xsc_cq *cq,
+ const struct ibv_cq_init_attr_ex *cq_attr,
+ struct xsc_context *xctx);
+int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq,
+ struct xsc_buf *buf, int nent, int cqe_sz);
+int xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf);
+int xsc_resize_cq(struct ibv_cq *cq, int cqe);
+int xsc_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr);
+int xsc_destroy_cq(struct ibv_cq *cq);
+int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
+int xsc_arm_cq(struct ibv_cq *cq, int solicited);
+void xsc_cq_event(struct ibv_cq *cq);
+void __xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn);
+void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn);
+
+struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ int attr_mask,
+ struct ibv_qp_init_attr *init_attr);
+int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ int attr_mask);
+int xsc_modify_qp_rate_limit(struct ibv_qp *qp,
+ struct ibv_qp_rate_limit_attr *attr);
+int xsc_destroy_qp(struct ibv_qp *qp);
+void xsc_init_qp_indices(struct xsc_qp *qp);
+void xsc_init_rwq_indices(struct xsc_rwq *rwq);
+int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr);
+int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+int xsc_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn);
+int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp);
+void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn);
+int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state,
+ enum ibv_qp_state state);
+int32_t xsc_store_uidx(struct xsc_context *ctx, void *rsc);
+void xsc_clear_uidx(struct xsc_context *ctx, uint32_t uidx);
+struct ibv_ah *xsc_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+int xsc_destroy_ah(struct ibv_ah *ah);
+int xsc_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
+int xsc_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
+int xsc_round_up_power_of_two(long long sz);
+void *xsc_get_send_wqe(struct xsc_qp *qp, int n);
+struct ibv_xrcd *xsc_open_xrcd(struct ibv_context *context,
+ struct ibv_xrcd_init_attr *xrcd_init_attr);
+int xsc_close_xrcd(struct ibv_xrcd *ib_xrcd);
+struct ibv_wq *xsc_create_wq(struct ibv_context *context,
+ struct ibv_wq_init_attr *attr);
+int xsc_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr);
+int xsc_destroy_wq(struct ibv_wq *wq);
+struct ibv_rwq_ind_table *xsc_create_rwq_ind_table(struct ibv_context *context,
+ struct ibv_rwq_ind_table_init_attr *init_attr);
+int xsc_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table);
+struct ibv_flow *xsc_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow_attr);
+int xsc_destroy_flow(struct ibv_flow *flow_id);
+struct ibv_flow_action *xsc_create_flow_action_esp(struct ibv_context *ctx,
+ struct ibv_flow_action_esp_attr *attr);
+int xsc_destroy_flow_action(struct ibv_flow_action *action);
+int xsc_modify_flow_action_esp(struct ibv_flow_action *action,
+ struct ibv_flow_action_esp_attr *attr);
+
+struct ibv_dm *xsc_alloc_dm(struct ibv_context *context,
+ struct ibv_alloc_dm_attr *dm_attr);
+int xsc_free_dm(struct ibv_dm *ibdm);
+struct ibv_mr *xsc_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm,
+ uint64_t dm_offset, size_t length,
+ unsigned int acc);
+
+struct ibv_pd *xsc_alloc_parent_domain(struct ibv_context *context,
+ struct ibv_parent_domain_init_attr *attr);
+
+
+struct ibv_counters *xsc_create_counters(struct ibv_context *context,
+ struct ibv_counters_init_attr *init_attr);
+int xsc_destroy_counters(struct ibv_counters *counters);
+int xsc_attach_counters_point_flow(struct ibv_counters *counters,
+ struct ibv_counter_attach_attr *attr,
+ struct ibv_flow *flow);
+int xsc_read_counters(struct ibv_counters *counters,
+ uint64_t *counters_value,
+ uint32_t ncounters,
+ uint32_t flags);
+
+static inline void *xsc_find_uidx(struct xsc_context *ctx, uint32_t uidx)
+{
+ int tind = uidx >> XSC_UIDX_TABLE_SHIFT;
+
+ if (likely(ctx->uidx_table[tind].refcnt))
+ return ctx->uidx_table[tind].table[uidx & XSC_UIDX_TABLE_MASK];
+
+ return NULL;
+}
+
+static inline int xsc_spin_lock(struct xsc_spinlock *lock)
+{
+ if (lock->need_lock)
+ return pthread_spin_lock(&lock->lock);
+
+ if (unlikely(lock->in_use)) {
+ fprintf(stderr, "*** ERROR: multithreading vilation ***\n"
+ "You are running a multithreaded application but\n"
+ "you set XSC_SINGLE_THREADED=1. Please unset it.\n");
+ abort();
+ } else {
+ lock->in_use = 1;
+ /*
+ * This fence is not at all correct, but it increases the
+ * chance that in_use is detected by another thread without
+ * much runtime cost. */
+ atomic_thread_fence(memory_order_acq_rel);
+ }
+
+ return 0;
+}
+
+static inline int xsc_spin_unlock(struct xsc_spinlock *lock)
+{
+ if (lock->need_lock)
+ return pthread_spin_unlock(&lock->lock);
+
+ lock->in_use = 0;
+
+ return 0;
+}
+
+static inline int xsc_spinlock_init(struct xsc_spinlock *lock, int need_lock)
+{
+ lock->in_use = 0;
+ lock->need_lock = need_lock;
+ return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE);
+}
+
+static inline int xsc_spinlock_init_pd(struct xsc_spinlock *lock, struct ibv_pd *pd)
+{
+ int thread_safe = xsc_single_threaded;
+
+ return xsc_spinlock_init(lock, !thread_safe);
+}
+
+static inline int xsc_spinlock_destroy(struct xsc_spinlock *lock)
+{
+ return pthread_spin_destroy(&lock->lock);
+}
+
+static inline void set_command(int command, off_t *offset)
+{
+ *offset |= (command << XSC_IB_MMAP_CMD_SHIFT);
+}
+
+static inline void set_arg(int arg, off_t *offset)
+{
+ *offset |= arg;
+}
+
+static inline void set_order(int order, off_t *offset)
+{
+ set_arg(order, offset);
+}
+
+static inline void set_index(int index, off_t *offset)
+{
+ set_arg(index, offset);
+}
+
+static inline void set_extended_index(int index, off_t *offset)
+{
+ *offset |= (index & 0xff) | ((index >> 8) << 16);
+}
+
+static inline uint8_t calc_sig(void *wqe, int size)
+{
+ int i;
+ uint8_t *p = wqe;
+ uint8_t res = 0;
+
+ for (i = 0; i < size; ++i)
+ res ^= p[i];
+
+ return ~res;
+}
+
+#endif /* XSC_H */
diff --git a/providers/xscale/xscdv.h b/providers/xscale/xscdv.h
new file mode 100644
index 0000000..98d2daf
--- /dev/null
+++ b/providers/xscale/xscdv.h
@@ -0,0 +1,876 @@
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef _XSCDV_H_
+#define _XSCDV_H_
+
+#include <stdio.h>
+#include <linux/types.h> /* For the __be64 type */
+#include <sys/types.h>
+#include <endian.h>
+#if defined(__SSE3__)
+#include <limits.h>
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#endif /* defined(__SSE3__) */
+
+#include <infiniband/verbs.h>
+#include <infiniband/tm_types.h>
+#include <infiniband/xsc_api.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Always inline the functions */
+#ifdef __GNUC__
+#define XSCDV_ALWAYS_INLINE inline __attribute__((always_inline))
+#else
+#define XSCDV_ALWAYS_INLINE inline
+#endif
+
+enum {
+ XSC_RCV_DBR = 0,
+ XSC_SND_DBR = 1,
+};
+
+enum xscdv_context_comp_mask {
+ XSCDV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0,
+ XSCDV_CONTEXT_MASK_SWP = 1 << 1,
+ XSCDV_CONTEXT_MASK_STRIDING_RQ = 1 << 2,
+ XSCDV_CONTEXT_MASK_TUNNEL_OFFLOADS = 1 << 3,
+ XSCDV_CONTEXT_MASK_DYN_BFREGS = 1 << 4,
+ XSCDV_CONTEXT_MASK_CLOCK_INFO_UPDATE = 1 << 5,
+ XSCDV_CONTEXT_MASK_FLOW_ACTION_FLAGS = 1 << 6,
+};
+
+struct xscdv_cqe_comp_caps {
+ uint32_t max_num;
+ uint32_t supported_format; /* enum xscdv_cqe_comp_res_format */
+};
+
+struct xscdv_sw_parsing_caps {
+ uint32_t sw_parsing_offloads; /* Use enum xscdv_sw_parsing_offloads */
+ uint32_t supported_qpts;
+};
+
+struct xscdv_striding_rq_caps {
+ uint32_t min_single_stride_log_num_of_bytes;
+ uint32_t max_single_stride_log_num_of_bytes;
+ uint32_t min_single_wqe_log_num_of_strides;
+ uint32_t max_single_wqe_log_num_of_strides;
+ uint32_t supported_qpts;
+};
+
+/*
+ * Direct verbs device-specific attributes
+ */
+struct xscdv_context {
+ uint8_t version;
+ uint64_t flags;
+ uint64_t comp_mask;
+ struct xscdv_cqe_comp_caps cqe_comp_caps;
+ struct xscdv_sw_parsing_caps sw_parsing_caps;
+ struct xscdv_striding_rq_caps striding_rq_caps;
+ uint32_t tunnel_offloads_caps;
+ uint64_t max_clock_info_update_nsec;
+ uint32_t flow_action_flags;
+};
+
+enum xscsdv_context_flags {
+ /*
+ * This flag indicates if CQE version 0 or 1 is needed.
+ */
+ XSCDV_CONTEXT_FLAGS_CQE_V1 = (1 << 0),
+ XSCDV_CONTEXT_FLAGS_OBSOLETE = (1 << 1), /* Obsoleted, don't use */
+ XSCDV_CONTEXT_FLAGS_MPW_ALLOWED = (1 << 2),
+ XSCDV_CONTEXT_FLAGS_ENHANCED_MPW = (1 << 3),
+ XSCDV_CONTEXT_FLAGS_CQE_128B_COMP = (1 << 4), /* Support CQE 128B compression */
+ XSCDV_CONTEXT_FLAGS_CQE_128B_PAD = (1 << 5), /* Support CQE 128B padding */
+};
+
+enum xscdv_cq_init_attr_mask {
+ XSCDV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0,
+ XSCDV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 1,
+ XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE = 1 << 2,
+};
+
+struct xscdv_cq_init_attr {
+ uint64_t comp_mask; /* Use enum xscdv_cq_init_attr_mask */
+ uint8_t cqe_comp_res_format; /* Use enum xscdv_cqe_comp_res_format */
+ uint32_t flags;
+ uint16_t cqe_size; /* when XSCDV_CQ_INIT_ATTR_MASK_CQE_SIZE set */
+};
+
+struct ibv_cq_ex *xscdv_create_cq(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *cq_attr,
+ struct xscdv_cq_init_attr *xcq_attr);
+
+enum xscdv_qp_create_flags {
+ XSCDV_QP_CREATE_TUNNEL_OFFLOADS = 1 << 0,
+ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC = 1 << 1,
+ XSCDV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC = 1 << 2,
+ XSCDV_QP_CREATE_DISABLE_SCATTER_TO_CQE = 1 << 3,
+ XSCDV_QP_CREATE_ALLOW_SCATTER_TO_CQE = 1 << 4,
+};
+
+enum xscdv_qp_init_attr_mask {
+ XSCDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0,
+ XSCDV_QP_INIT_ATTR_MASK_DC = 1 << 1,
+};
+
+enum xscdv_dc_type {
+ XSCDV_DCTYPE_DCT = 1,
+ XSCDV_DCTYPE_DCI,
+};
+
+struct xscdv_dc_init_attr {
+ enum xscdv_dc_type dc_type;
+ uint64_t dct_access_key;
+};
+
+struct xscdv_qp_init_attr {
+ uint64_t comp_mask; /* Use enum xscdv_qp_init_attr_mask */
+ uint32_t create_flags; /* Use enum xsc_qp_create_flags */
+ struct xscdv_dc_init_attr dc_init_attr;
+};
+
+struct ibv_qp *xscdv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *qp_attr,
+ struct xscdv_qp_init_attr *xqp_attr);
+
+enum xscdv_flow_action_esp_mask {
+ XSCDV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0,
+};
+
+struct xscdv_flow_action_esp {
+ uint64_t comp_mask; /* Use enum xscdv_flow_action_esp_mask */
+ uint32_t action_flags; /* Use enum xscdv_flow_action_flags */
+};
+
+struct xscdv_flow_match_parameters {
+ size_t match_sz;
+ uint64_t match_buf[]; /* Device spec format */
+};
+
+struct xscdv_flow_matcher_attr {
+ enum ibv_flow_attr_type type;
+ uint32_t flags; /* From enum ibv_flow_flags */
+ uint16_t priority;
+ uint8_t match_criteria_enable; /* Device spec format */
+ struct xscdv_flow_match_parameters *match_mask;
+ uint64_t comp_mask;
+};
+
+struct xscdv_flow_matcher;
+
+struct xscdv_flow_matcher *
+xscdv_create_flow_matcher(struct ibv_context *context,
+ struct xscdv_flow_matcher_attr *matcher_attr);
+
+int xscdv_destroy_flow_matcher(struct xscdv_flow_matcher *matcher);
+
+enum xscdv_flow_action_type {
+ XSCDV_FLOW_ACTION_DEST_IBV_QP,
+ XSCDV_FLOW_ACTION_DROP,
+ XSCDV_FLOW_ACTION_IBV_COUNTER,
+ XSCDV_FLOW_ACTION_IBV_FLOW_ACTION,
+ XSCDV_FLOW_ACTION_TAG,
+ XSCDV_FLOW_ACTION_DEST_DEVX,
+};
+
+struct xscdv_flow_action_attr {
+ enum xscdv_flow_action_type type;
+ union {
+ struct ibv_qp *qp;
+ struct ibv_counters *counter;
+ struct ibv_flow_action *action;
+ uint32_t tag_value;
+ struct xscdv_devx_obj *obj;
+ };
+};
+
+struct ibv_flow *
+xscdv_create_flow(struct xscdv_flow_matcher *matcher,
+ struct xscdv_flow_match_parameters *match_value,
+ size_t num_actions,
+ struct xscdv_flow_action_attr actions_attr[]);
+
+struct ibv_flow_action *xscdv_create_flow_action_esp(struct ibv_context *ctx,
+ struct ibv_flow_action_esp_attr *esp,
+ struct xscdv_flow_action_esp *xattr);
+
+/*
+ * xscdv_create_flow_action_modify_header - Create a flow action which mutates
+ * a packet. The flow action can be attached to steering rules via
+ * ibv_create_flow().
+ *
+ * @ctx: RDMA device context to create the action on.
+ * @actions_sz: The size of *actions* buffer in bytes.
+ * @actions: A buffer which contains modify actions provided in device spec
+ * format.
+ * @ft_type: Defines the flow table type to which the modify
+ * header action will be attached.
+ *
+ * Return a valid ibv_flow_action if successful, NULL otherwise.
+ */
+struct ibv_flow_action *
+xscdv_create_flow_action_modify_header(struct ibv_context *ctx,
+ size_t actions_sz,
+ uint64_t actions[],
+ enum xscdv_flow_table_type ft_type);
+
+/*
+ * xscdv_create_flow_action_packet_reformat - Create flow action which can
+ * encap/decap packets.
+ */
+struct ibv_flow_action *
+xscdv_create_flow_action_packet_reformat(struct ibv_context *ctx,
+ size_t data_sz,
+ void *data,
+ enum xscdv_flow_action_packet_reformat_type reformat_type,
+ enum xscdv_flow_table_type ft_type);
+/*
+ * Most device capabilities are exported by ibv_query_device(...),
+ * but there is HW device-specific information which is important
+ * for data-path, but isn't provided.
+ *
+ * Return 0 on success.
+ */
+int xscdv_query_device(struct ibv_context *ctx_in,
+ struct xscdv_context *attrs_out);
+
+enum xscdv_qp_comp_mask {
+ XSCDV_QP_MASK_UAR_MMAP_OFFSET = 1 << 0,
+ XSCDV_QP_MASK_RAW_QP_HANDLES = 1 << 1,
+};
+
+struct xscdv_qp {
+ __le32 *dbrec;
+ struct {
+ void *buf;
+ uint32_t wqe_cnt;
+ uint32_t stride;
+ __le32 *db;
+ } sq;
+ struct {
+ void *buf;
+ uint32_t wqe_cnt;
+ uint32_t stride;
+ __le32 *db;
+ } rq;
+ uint64_t comp_mask;
+ uint32_t tirn;
+ uint32_t tisn;
+ uint32_t rqn;
+ uint32_t sqn;
+};
+
+struct xscdv_cq {
+ void *buf;
+ __le32 *dbrec;
+ __le32 *db;
+ uint32_t cqe_cnt;
+ uint32_t cqe_size;
+ uint32_t cqn;
+ uint64_t comp_mask;
+};
+
+struct xscdv_rwq {
+ void *buf;
+ __le32 *dbrec;
+ uint32_t wqe_cnt;
+ uint32_t stride;
+ uint64_t comp_mask;
+ __le32 *db;
+};
+
+struct xscdv_dm {
+ void *buf;
+ uint64_t length;
+ uint64_t comp_mask;
+};
+
+struct xsc_wqe_av;
+
+struct xscdv_ah {
+ struct xsc_wqe_av *av;
+ uint64_t comp_mask;
+};
+
+struct xscdv_pd {
+ uint32_t pdn;
+ uint64_t comp_mask;
+};
+
+struct xscdv_obj {
+ struct {
+ struct ibv_qp *in;
+ struct xscdv_qp *out;
+ } qp;
+ struct {
+ struct ibv_cq *in;
+ struct xscdv_cq *out;
+ } cq;
+ struct {
+ struct ibv_wq *in;
+ struct xscdv_rwq *out;
+ } rwq;
+ struct {
+ struct ibv_dm *in;
+ struct xscdv_dm *out;
+ } dm;
+ struct {
+ struct ibv_ah *in;
+ struct xscdv_ah *out;
+ } ah;
+ struct {
+ struct ibv_pd *in;
+ struct xscdv_pd *out;
+ } pd;
+};
+
+enum xscdv_obj_type {
+ XSCDV_OBJ_QP = 1 << 0,
+ XSCDV_OBJ_CQ = 1 << 1,
+ XSCDV_OBJ_SRQ = 1 << 2,
+ XSCDV_OBJ_RWQ = 1 << 3,
+ XSCDV_OBJ_DM = 1 << 4,
+ XSCDV_OBJ_AH = 1 << 5,
+ XSCDV_OBJ_PD = 1 << 6,
+};
+
+enum xscdv_wq_init_attr_mask {
+ XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ = 1 << 0,
+};
+
+struct xscdv_striding_rq_init_attr {
+ uint32_t single_stride_log_num_of_bytes;
+ uint32_t single_wqe_log_num_of_strides;
+ uint8_t two_byte_shift_en;
+};
+
+struct xscdv_wq_init_attr {
+ uint64_t comp_mask; /* Use enum xscdv_wq_init_attr_mask */
+ struct xscdv_striding_rq_init_attr striding_rq_attrs;
+};
+
+/*
+ * This function creates a work queue object with extra properties
+ * defined by xscdv_wq_init_attr struct.
+ *
+ * For each bit in the comp_mask, a field in xscdv_wq_init_attr
+ * should follow.
+ *
+ * XSCDV_WQ_INIT_ATTR_MASK_STRIDING_RQ: Create a work queue with
+ * striding RQ capabilities.
+ * - single_stride_log_num_of_bytes represents the size of each stride in the
+ * WQE and its value should be between min_single_stride_log_num_of_bytes
+ * and max_single_stride_log_num_of_bytes that are reported in
+ * xscdv_query_device.
+ * - single_wqe_log_num_of_strides represents the number of strides in each WQE.
+ * Its value should be between min_single_wqe_log_num_of_strides and
+ * max_single_wqe_log_num_of_strides that are reported in xscdv_query_device.
+ * - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeroes
+ * before writing the message to memory (e.g. for IP alignment)
+ */
+struct ibv_wq *xscdv_create_wq(struct ibv_context *context,
+ struct ibv_wq_init_attr *wq_init_attr,
+ struct xscdv_wq_init_attr *xwq_attr);
+/*
+ * This function will initialize xscdv_xxx structs based on supplied type.
+ * The information for initialization is taken from ibv_xx structs supplied
+ * as part of input.
+ *
+ * Request information of CQ marks its owned by DV for all consumer index
+ * related actions.
+ *
+ * The initialization type can be combination of several types together.
+ *
+ * Return: 0 in case of success.
+ */
+int xscdv_init_obj(struct xscdv_obj *obj, uint64_t obj_type);
+
+enum {
+ XSC_OPCODE_NOP = 0x00,
+ XSC_OPCODE_SEND_INVAL = 0x01,
+ XSC_OPCODE_RDMA_WRITE = 0x08,
+ XSC_OPCODE_RDMA_WRITE_IMM = 0x09,
+ XSC_OPCODE_SEND = 0x0a,
+ XSC_OPCODE_SEND_IMM = 0x0b,
+ XSC_OPCODE_TSO = 0x0e,
+ XSC_OPCODE_RDMA_READ = 0x10,
+ XSC_OPCODE_ATOMIC_CS = 0x11,
+ XSC_OPCODE_ATOMIC_FA = 0x12,
+ XSC_OPCODE_ATOMIC_MASKED_CS = 0x14,
+ XSC_OPCODE_ATOMIC_MASKED_FA = 0x15,
+ XSC_OPCODE_FMR = 0x19,
+ XSC_OPCODE_LOCAL_INVAL = 0x1b,
+ XSC_OPCODE_CONFIG_CMD = 0x1f,
+ XSC_OPCODE_UMR = 0x25,
+ XSC_OPCODE_TAG_MATCHING = 0x28
+};
+
+enum {
+ XSC_CQE_L2_OK = 1 << 0,
+ XSC_CQE_L3_OK = 1 << 1,
+ XSC_CQE_L4_OK = 1 << 2,
+};
+
+enum {
+ XSC_CQE_L3_HDR_TYPE_NONE = 0x0,
+ XSC_CQE_L3_HDR_TYPE_IPV6 = 0x1,
+ XSC_CQE_L3_HDR_TYPE_IPV4 = 0x2,
+};
+
+enum {
+ XSC_CQE_OWNER_MASK = 1,
+ XSC_CQE_REQ = 0,
+ XSC_CQE_RESP_WR_IMM = 1,
+ XSC_CQE_RESP_SEND = 2,
+ XSC_CQE_RESP_SEND_IMM = 3,
+ XSC_CQE_RESP_SEND_INV = 4,
+ XSC_CQE_RESIZE_CQ = 5,
+ XSC_CQE_NO_PACKET = 6,
+ XSC_CQE_REQ_ERR = 13,
+ XSC_CQE_RESP_ERR = 14,
+ XSC_CQE_INVALID = 15,
+};
+
+struct xsc_err_cqe {
+ uint8_t rsvd0[32];
+ uint32_t srqn;
+ uint8_t rsvd1[18];
+ uint8_t vendor_err_synd;
+ uint8_t syndrome;
+ uint32_t s_wqe_opcode_qpn;
+ uint16_t wqe_counter;
+ uint8_t signature;
+ uint8_t op_own;
+};
+
+struct xsc_tm_cqe {
+ __be32 success;
+ __be16 hw_phase_cnt;
+ uint8_t rsvd0[12];
+};
+
+struct xsc_cqe64 {
+ union {
+ struct {
+ uint8_t rsvd0[2];
+ __be16 wqe_id;
+ uint8_t rsvd4[13];
+ uint8_t ml_path;
+ uint8_t rsvd20[4];
+ __be16 slid;
+ __be32 flags_rqpn;
+ uint8_t hds_ip_ext;
+ uint8_t l4_hdr_type_etc;
+ __be16 vlan_info;
+ };
+ struct xsc_tm_cqe tm_cqe;
+ /* TMH is scattered to CQE upon match */
+ struct ibv_tmh tmh;
+ };
+ __be32 srqn_uidx;
+ __be32 imm_inval_pkey;
+ uint8_t app;
+ uint8_t app_op;
+ __be16 app_info;
+ __be32 byte_cnt;
+ __be64 timestamp;
+ __be32 sop_drop_qpn;
+ __be16 wqe_counter;
+ uint8_t signature;
+ uint8_t op_own;
+};
+
+enum xscdv_cqe_comp_res_format {
+ XSCDV_CQE_RES_FORMAT_HASH = 1 << 0,
+ XSCDV_CQE_RES_FORMAT_CSUM = 1 << 1,
+ XSCDV_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2,
+};
+
+enum xscdv_sw_parsing_offloads {
+ XSCDV_SW_PARSING = 1 << 0,
+ XSCDV_SW_PARSING_CSUM = 1 << 1,
+ XSCDV_SW_PARSING_LSO = 1 << 2,
+};
+
+static XSCDV_ALWAYS_INLINE
+uint8_t xscdv_get_cqe_owner(struct xsc_cqe64 *cqe)
+{
+ return cqe->op_own & 0x1;
+}
+
+static XSCDV_ALWAYS_INLINE
+void xscdv_set_cqe_owner(struct xsc_cqe64 *cqe, uint8_t val)
+{
+ cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1);
+}
+
+/* Solicited event */
+static XSCDV_ALWAYS_INLINE
+uint8_t xscdv_get_cqe_se(struct xsc_cqe64 *cqe)
+{
+ return (cqe->op_own >> 1) & 0x1;
+}
+
+static XSCDV_ALWAYS_INLINE
+uint8_t xscdv_get_cqe_format(struct xsc_cqe64 *cqe)
+{
+ return (cqe->op_own >> 2) & 0x3;
+}
+
+static XSCDV_ALWAYS_INLINE
+uint8_t xscdv_get_cqe_opcode(struct xsc_cqe64 *cqe)
+{
+ return cqe->op_own >> 4;
+}
+
+/*
+ * WQE related part
+ */
+enum {
+ XSC_INVALID_LKEY = 0x100,
+};
+
+enum {
+ XSC_SEND_WQE_BB = 64,
+ XSC_SEND_WQE_SHIFT = 6,
+};
+
+struct xsc_wqe_srq_next_seg {
+ uint8_t rsvd0[2];
+ __be16 next_wqe_index;
+ uint8_t signature;
+ uint8_t rsvd1[11];
+};
+
+struct xsc_wqe_ctrl_seg {
+ __be32 opmod_idx_opcode;
+ __be32 qpn_ds;
+ uint8_t signature;
+ uint8_t rsvd[2];
+ uint8_t fm_ce_se;
+ __be32 imm;
+};
+
+struct xsc_wqe_av {
+ union {
+ struct {
+ __be32 qkey;
+ __be32 reserved;
+ } qkey;
+ __be64 dc_key;
+ } key;
+ __be32 dqp_dct;
+ uint8_t stat_rate_sl;
+ uint8_t fl_mlid;
+ __be16 rlid;
+ uint8_t reserved0[4];
+ uint8_t rmac[6];
+ uint8_t tclass;
+ uint8_t hop_limit;
+ __be32 grh_gid_fl;
+ uint8_t rgid[16];
+};
+
+struct xsc_wqe_datagram_seg {
+ struct xsc_wqe_av av;
+};
+
+struct xsc_wqe_raddr_seg {
+ __be64 raddr;
+ __be32 rkey;
+ __be32 reserved;
+};
+
+struct xsc_wqe_atomic_seg {
+ __be64 swap_add;
+ __be64 compare;
+};
+
+struct xsc_wqe_inl_data_seg {
+ uint32_t byte_count;
+};
+
+struct xsc_wqe_eth_seg {
+ __be32 rsvd0;
+ uint8_t cs_flags;
+ uint8_t rsvd1;
+ __be16 mss;
+ __be32 rsvd2;
+ __be16 inline_hdr_sz;
+ uint8_t inline_hdr_start[2];
+ uint8_t inline_hdr[16];
+};
+
+/*
+ * Control segment - contains some control information for the current WQE.
+ *
+ * Output:
+ * seg - control segment to be filled
+ * Input:
+ * pi - WQEBB number of the first block of this WQE.
+ * This number should wrap at 0xffff, regardless of
+ * size of the WQ.
+ * opcode - Opcode of this WQE. Encodes the type of operation
+ * to be executed on the QP.
+ * opmod - Opcode modifier.
+ * qp_num - QP/SQ number this WQE is posted to.
+ * fm_ce_se - FM (fence mode), CE (completion and event mode)
+ * and SE (solicited event).
+ * ds - WQE size in octowords (16-byte units). DS accounts for all
+ * the segments in the WQE as summarized in WQE construction.
+ * signature - WQE signature.
+ * imm - Immediate data/Invalidation key/UMR mkey.
+ */
+static XSCDV_ALWAYS_INLINE
+void xscdv_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi,
+ uint8_t opcode, uint8_t opmod, uint32_t qp_num,
+ uint8_t fm_ce_se, uint8_t ds,
+ uint8_t signature, uint32_t imm)
+{
+ seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode);
+ seg->qpn_ds = htobe32((qp_num << 8) | ds);
+ seg->fm_ce_se = fm_ce_se;
+ seg->signature = signature;
+ /*
+ * The caller should prepare "imm" in advance based on WR opcode.
+ * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM,
+ * the "imm" should be assigned as is.
+ * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm).
+ */
+ seg->imm = imm;
+}
+
+/* x86 optimized version of xscdv_set_ctrl_seg()
+ *
+ * This is useful when doing calculations on large data sets
+ * for parallel calculations.
+ *
+ * It doesn't suit for serialized algorithms.
+ */
+#if defined(__SSE3__)
+static XSCDV_ALWAYS_INLINE
+void xscdv_x86_set_ctrl_seg(struct xsc_wqe_ctrl_seg *seg, uint16_t pi,
+ uint8_t opcode, uint8_t opmod, uint32_t qp_num,
+ uint8_t fm_ce_se, uint8_t ds,
+ uint8_t signature, uint32_t imm)
+{
+ __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi,
+ (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se);
+ __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */
+ 0, /* signal/fence_mode */
+#if CHAR_MIN
+ -128, -128, /* reserved */
+#else
+ 0x80, 0x80, /* reserved */
+#endif
+ 3, /* signature */
+ 6, /* data size */
+ 8, 9, 10, /* QP num */
+ 2, /* opcode */
+ 4, 5, /* sw_pi in BE */
+ 1 /* opmod */
+ );
+ *(__m128i *) seg = _mm_shuffle_epi8(val, mask);
+}
+#endif /* defined(__SSE3__) */
+
+/*
+ * Datagram Segment - contains address information required in order
+ * to form a datagram message.
+ *
+ * Output:
+ * seg - datagram segment to be filled.
+ * Input:
+ * key - Q_key/access key.
+ * dqp_dct - Destination QP number for UD and DCT for DC.
+ * ext - Address vector extension.
+ * stat_rate_sl - Maximum static rate control, SL/ethernet priority.
+ * fl_mlid - Force loopback and source LID for IB.
+ * rlid - Remote LID
+ * rmac - Remote MAC
+ * tclass - GRH tclass/IPv6 tclass/IPv4 ToS
+ * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL
+ * grh_gid_fi - GRH, source GID address and IPv6 flow label.
+ * rgid - Remote GID/IP address.
+ */
+static XSCDV_ALWAYS_INLINE
+void xscdv_set_dgram_seg(struct xsc_wqe_datagram_seg *seg,
+ uint64_t key, uint32_t dqp_dct,
+ uint8_t ext, uint8_t stat_rate_sl,
+ uint8_t fl_mlid, uint16_t rlid,
+ uint8_t *rmac, uint8_t tclass,
+ uint8_t hop_limit, uint32_t grh_gid_fi,
+ uint8_t *rgid)
+{
+
+ /* Always put 64 bits, in q_key, the reserved part will be 0 */
+ seg->av.key.dc_key = htobe64(key);
+ seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct);
+ seg->av.stat_rate_sl = stat_rate_sl;
+ seg->av.fl_mlid = fl_mlid;
+ seg->av.rlid = htobe16(rlid);
+ memcpy(seg->av.rmac, rmac, 6);
+ seg->av.tclass = tclass;
+ seg->av.hop_limit = hop_limit;
+ seg->av.grh_gid_fl = htobe32(grh_gid_fi);
+ memcpy(seg->av.rgid, rgid, 16);
+}
+
+/*
+ * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading.
+ *
+ * Output:
+ * seg - Eth segment to be filled.
+ * Input:
+ * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner.
+ * mss - Maximum segment size. For TSO WQEs, the number of bytes
+ * in the TCP payload to be transmitted in each packet. Must
+ * be 0 on non TSO WQEs.
+ * inline_hdr_sz - Length of the inlined packet headers.
+ * inline_hdr_start - Inlined packet header.
+ */
+static XSCDV_ALWAYS_INLINE
+void xscdv_set_eth_seg(struct xsc_wqe_eth_seg *seg, uint8_t cs_flags,
+ uint16_t mss, uint16_t inline_hdr_sz,
+ uint8_t *inline_hdr_start)
+{
+ seg->cs_flags = cs_flags;
+ seg->mss = htobe16(mss);
+ seg->inline_hdr_sz = htobe16(inline_hdr_sz);
+ memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz);
+}
+
+enum xscdv_set_ctx_attr_type {
+ XSCDV_CTX_ATTR_BUF_ALLOCATORS = 1,
+};
+
+enum {
+ XSC_MMAP_GET_REGULAR_PAGES_CMD = 0,
+ XSC_MMAP_GET_NC_PAGES_CMD = 3,
+};
+
+struct xscdv_ctx_allocators {
+ void *(*alloc)(size_t size, void *priv_data);
+ void (*free)(void *ptr, void *priv_data);
+ void *data;
+};
+
+/*
+ * Generic context attributes set API
+ *
+ * Returns 0 on success, or the value of errno on failure
+ * (which indicates the failure reason).
+ */
+int xscdv_set_context_attr(struct ibv_context *context,
+ enum xscdv_set_ctx_attr_type type, void *attr);
+
+struct xscdv_clock_info {
+ uint64_t nsec;
+ uint64_t last_cycles;
+ uint64_t frac;
+ uint32_t mult;
+ uint32_t shift;
+ uint64_t mask;
+};
+
+/*
+ * Get xsc core clock info
+ *
+ * Output:
+ * clock_info - clock info to be filled
+ * Input:
+ * context - device context
+ *
+ * Return: 0 on success, or the value of errno on failure
+ */
+int xscdv_get_clock_info(struct ibv_context *context,
+ struct xscdv_clock_info *clock_info);
+
+/*
+ * Translate device timestamp to nano-sec
+ *
+ * Input:
+ * clock_info - clock info to be filled
+ * device_timestamp - timestamp to translate
+ *
+ * Return: nano-sec
+ */
+static inline uint64_t xscdv_ts_to_ns(struct xscdv_clock_info *clock_info,
+ uint64_t device_timestamp)
+{
+ uint64_t delta, nsec;
+
+ /*
+ * device_timestamp & cycles are the free running 'mask' bit counters
+ * from the hardware hca_core_clock clock.
+ */
+ delta = (device_timestamp - clock_info->last_cycles) & clock_info->mask;
+ nsec = clock_info->nsec;
+
+ /*
+ * Guess if the device_timestamp is more recent than
+ * clock_info->last_cycles, if not (too far in the future) treat
+ * it as old time stamp. This will break every max_clock_info_update_nsec.
+ */
+
+ if (delta > clock_info->mask / 2) {
+ delta = (clock_info->last_cycles - device_timestamp) &
+ clock_info->mask;
+ nsec -= ((delta * clock_info->mult) - clock_info->frac) >>
+ clock_info->shift;
+ } else {
+ nsec += ((delta * clock_info->mult) + clock_info->frac) >>
+ clock_info->shift;
+ }
+
+ return nsec;
+}
+
+enum xscdv_context_attr_flags {
+ XSCDV_CONTEXT_FLAGS_DEVX = 1 << 0,
+};
+
+struct xscdv_context_attr {
+ uint32_t flags; /* Use enum xscdv_context_attr_flags */
+ uint64_t comp_mask;
+};
+
+struct ibv_context *
+xscdv_open_device(struct ibv_device *device, struct xscdv_context_attr *attr);
+
+struct xscdv_devx_obj;
+
+struct xscdv_devx_obj *
+xscdv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen,
+ void *out, size_t outlen);
+int xscdv_devx_obj_query(struct xscdv_devx_obj *obj, const void *in, size_t inlen,
+ void *out, size_t outlen);
+int xscdv_devx_obj_modify(struct xscdv_devx_obj *obj, const void *in, size_t inlen,
+ void *out, size_t outlen);
+int xscdv_devx_obj_destroy(struct xscdv_devx_obj *obj);
+int xscdv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen,
+ void *out, size_t outlen);
+
+struct xscdv_devx_umem {
+ uint32_t umem_id;
+};
+
+struct xscdv_devx_umem *
+xscdv_devx_umem_reg(struct ibv_context *ctx, void *addr, size_t size, uint32_t access);
+int xscdv_devx_umem_dereg(struct xscdv_devx_umem *umem);
+int xscdv_devx_query_eqn(struct ibv_context *context, uint32_t vector,
+ uint32_t *eqn);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _XSCDV_H_ */
diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index c347195..a7aa5bc 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -176,6 +176,8 @@ Provides: libocrdma = %{version}-%{release}
Obsoletes: libocrdma < %{version}-%{release}
Provides: librxe = %{version}-%{release}
Obsoletes: librxe < %{version}-%{release}
+Provides: libxscale = %{version}-%{release}
+Obsoletes: libxscale < %{version}-%{release}
%description -n libibverbs
libibverbs is a library that allows userspace processes to use RDMA
@@ -202,6 +204,7 @@ Device-specific plug-in ibverbs userspace drivers are included:
- librxe: A software implementation of the RoCE protocol
- libsiw: A software implementation of the iWarp protocol
- libvmw_pvrdma: VMware paravirtual RDMA device
+- libxscale: Yunsilicon RDMA device
%package -n libibverbs-utils
Summary: Examples for the libibverbs library
@@ -583,6 +586,7 @@ fi
%{_libdir}/libmana.so.*
%{_libdir}/libmlx5.so.*
%{_libdir}/libmlx4.so.*
+%{_libdir}/libxscale.so.*
%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver
%doc %{_docdir}/%{name}/libibverbs.md
--
2.43.0