Add support for HSAK

Signed-off-by: Weifeng Su <suweifeng1@huawei.com>
This commit is contained in:
Weifeng Su 2022-06-08 20:40:20 +08:00
parent d166c00e9f
commit 4970683a63
12 changed files with 8228 additions and 2 deletions

View File

@ -0,0 +1,870 @@
From f0710b6c37214457ab46bd1859f00ec413b01a7f Mon Sep 17 00:00:00 2001
From: sunshihao <sunshihao@huawei.com>
Date: Thu, 18 Feb 2021 10:52:24 +0800
Subject: [PATCH 17/27] add HSAK needed head file and API to spdk
Signed-off-by: sunshihao <sunshihao@huawei.com>
---
CONFIG | 3 +
Makefile | 6 +
configure | 8 ++
etc/spdk/nvme.conf.in | 88 ++++++++++++
include/spdk/bdev.h | 85 +++++++++++
include/spdk/bdev_module.h | 89 ++++++++++++
include/spdk/log.h | 2 +-
include/spdk/nvme.h | 230 ++++++++++++++++++++++++++++++
include/spdk/thread.h | 18 +++
include/spdk_internal/bdev_stat.h | 63 ++++++++
include/spdk_internal/debug.h | 43 ++++++
include/spdk_internal/thread.h | 2 +
mk/spdk.app_vars.mk | 4 +-
13 files changed, 639 insertions(+), 2 deletions(-)
create mode 100644 etc/spdk/nvme.conf.in
create mode 100644 include/spdk_internal/bdev_stat.h
create mode 100644 include/spdk_internal/debug.h
diff --git a/CONFIG b/CONFIG
index b5fffae..214e59e 100644
--- a/CONFIG
+++ b/CONFIG
@@ -43,6 +43,9 @@ CONFIG_CROSS_PREFIX=
# Build with debug logging. Turn off for performance testing and normal usage
CONFIG_DEBUG=n
+# Enable read and write NVMe for application
+CONFIG_APP_RW=n
+
# Treat warnings as errors (fail the build on any warning).
CONFIG_WERROR=n
diff --git a/Makefile b/Makefile
index a50fa94..1c98268 100644
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,12 @@ S :=
SPDK_ROOT_DIR := $(CURDIR)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+ifeq ($(CONFIG_APP_RW),y)
+# secure compile option
+CFLAGS += -fPIE -pie -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror
+CFLAGS += -Wl,-z,relro,-z,now,-z,noexecstack -Wtrampolines
+endif
+
DIRS-y += lib
DIRS-y += module
DIRS-$(CONFIG_SHARED) += shared_lib
diff --git a/configure b/configure
index 5b48696..964322e 100644
--- a/configure
+++ b/configure
@@ -25,6 +25,8 @@ function usage()
echo " example: aarch64-linux-gnu"
echo ""
echo " --enable-debug Configure for debug builds"
+ echo " --enable-err-injc Enable error injection feature"
+ echo " --enable-raw Enable read and write NVMe disk feature."
echo " --enable-werror Treat compiler warnings as errors"
echo " --enable-asan Enable address sanitizer"
echo " --enable-ubsan Enable undefined behavior sanitizer"
@@ -204,6 +206,12 @@ for i in "$@"; do
--disable-debug)
CONFIG[DEBUG]=n
;;
+ --enable-raw)
+ CONFIG[APP_RW]=y
+ ;;
+ --enable-err-injc)
+ CONFIG[ERR_INJC]=y
+ ;;
--enable-asan)
CONFIG[ASAN]=y
;;
diff --git a/etc/spdk/nvme.conf.in b/etc/spdk/nvme.conf.in
new file mode 100644
index 0000000..a3df92b
--- /dev/null
+++ b/etc/spdk/nvme.conf.in
@@ -0,0 +1,88 @@
+#NVME configuration file
+#
+# Please write all parameters using ASCII.
+# The parameter must be quoted if it includes whitespace.
+#
+# Configuration syntax:
+# Leading whitespace is ignored.
+# Lines starting with '#' are comments.
+# Lines ending with '\' are concatenated with the next line.
+# Bracketed ([]) names define sections
+
+[Global]
+ # Users can restrict work items to only run on certain cores by specifying a ReactorMask.
+ # Can not specify the NO. 0 core.
+ ReactorMask 0x2
+
+ # The print level of log.
+ # 0: Print ERROR log only; 1: Print WARNING and ERROR log; and so on, 4: Print all level log
+ LogLevel 1
+
+ # The sizes of Memory for Libstorge(Unit: MB). The minimum value is 300MB.
+ # If parameter "SocketMem" was set corrected, MemSize was useless
+ MemSize 300
+
+ # The same block device supports multiple queues.
+ MultiQ No
+
+ # End-to-end data protection. This item is only used if the namespace is formatted to use end-to-end protection information.
+ # if the value is set to '1', then the protection information are generated by controller, and the logical block data and protection information are written to NVM.
+ # if the value is set to '2', then the protection information are transferred from the host buffer to NVM.
+ E2eDif 2
+
+ # Open IOstat or not
+ IoStat No
+
+ # Poll time threshold in millisecond, It will count exceptional polling thread call which duration exceed the value and display in stat report.
+ # This item is only used when UseReactor = No, Set to 0 means disable this measurement.
+ PollTime 0
+
+ # Preallocate specified amounts of memory(Unit: MB) per socket.
+ # The parameter is a comma-sprated list of values, For example:
+ # SocketMem 1024,2048
+ # This will allocate 1 gigabyte of memory on socket 0, and 2048 megabytes of memory on socket 1.
+ # The sum of socket memory must be greater than 300MB.
+ # if SocketMem was set corrected, The parameter "MemSize" was useless
+ # SocketMem 300
+
+ # Place a per-socket upper limit on memory use (non-legacy memory mode only).
+ # 0 will disable the limit for a particular socket.
+ # SocketLimit 1024,1
+ # This will set upper limit of 1 gigabyte on socket 0, and 1 megabytes of memory on socket 1.
+ # if the value is set to empty, means disable the limit per socket.
+ # if SocketMem was empty, the parameter was useless.
+ # SocketLimit 300
+
+ #Decide whether to start rpc server or not
+ RpcServer Yes
+
+# NVMe configuration options
+[Nvme]
+ # NVMe Device Whitelist
+ # Users may specify which NVMe devices to claim by their transport id.
+ # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
+ # The second argument is the assigned name, which can be referenced from
+ # other sections in the configuration file. For NVMe devices, a namespace
+ # is automatically appended to each name in the format <YourName>nY, where
+ # Y is the NSID (starts at 1).
+ #TransportID "trtype:PCIe traddr:0000:81:00.0" nvme0
+ #TransportID "trtype:PCIe traddr:0000:01:00.0" nvme1
+
+ # The number of attempts per I/O when an I/O fails. Do not include
+ # this key to get the default behavior.
+ RetryCount 4
+ # Timeout for each command, in microseconds. If 0, don't track timeouts.
+ TimeoutUsec 0
+ # Action to take on command time out. Only valid when Timeout is greater
+ # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort
+ # the command, or 'None' to just print a message but do nothing.
+ # Admin command timeouts will always result in a reset.
+ ActionOnTimeout None
+ # Set how often the admin queue is polled for asynchronous events.
+ # Units in microseconds.
+ AdminPollRate 100000
+
+[Reactor]
+ # Batch size of IO for one-time release by reactor.
+ # The maximum value is 32.
+ BatchSize 8
diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h
index d894646..2951660 100644
--- a/include/spdk/bdev.h
+++ b/include/spdk/bdev.h
@@ -53,6 +53,8 @@ extern "C" {
#define SPDK_BDEV_SMALL_BUF_MAX_SIZE 8192
#define SPDK_BDEV_LARGE_BUF_MAX_SIZE (64 * 1024)
+#define SPDK_BDEV_SMALL_BUF_WITH_MAX_MD 512
+#define SPDK_BDEV_LARGE_BUF_WITH_MAX_MD 1024
/* Increase the buffer size to store interleaved metadata. Increment is the
* amount necessary to store metadata per data block. 16 byte metadata per
@@ -116,6 +118,42 @@ enum spdk_bdev_status {
SPDK_BDEV_STATUS_REMOVING,
};
+#ifdef SPDK_CONFIG_APP_RW
+/** ns status */
+enum spdk_bdev_ns_status {
+ SPDK_BDEV_NS_STATUS_INVALID,
+ SPDK_BDEV_NS_STATUS_READY,
+ SPDK_BDEV_NS_STATUS_REMOVING,
+ SPDK_BDEV_NS_STATUS_UNREGISTER,
+};
+
+typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, void *cb_arg);
+
+typedef struct libstorage_io {
+ uint8_t *buf;
+ struct iovec *iovs; /* array of iovecs to transfer */
+ int iovcnt; /* Number of iovecs in iovs array */
+ int32_t fd; /* File Descriptor */
+ uint16_t opcode; /* r/w */
+ uint16_t streamId; /* Stream ID for IO */
+ uint8_t pi_action;
+ uint8_t fua;
+ uint8_t location;
+ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */
+ uint32_t count;
+ uint32_t nbytes;
+ uint64_t offset;
+ uint8_t *md_buf;
+ uint32_t md_len;
+ uint32_t magic;
+ /*Save the error code returned by the callback */
+ int32_t err;
+ int32_t reserved;
+ LIBSTORAGE_CALLBACK_FUNC cb;
+ void *cb_arg;
+} LIBSTORAGE_IO_T;
+#endif
+
/**
* \brief Handle to an opened SPDK block device.
*/
@@ -140,6 +178,13 @@ enum spdk_bdev_io_type {
SPDK_BDEV_IO_TYPE_COMPARE,
SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE,
SPDK_BDEV_IO_TYPE_ABORT,
+#ifdef SPDK_CONFIG_APP_RW
+ SPDK_BDEV_IO_TYPE_READ_NVME,
+ SPDK_BDEV_IO_TYPE_WRITE_NVME,
+ SPDK_BDEV_IO_TYPE_READV_NVME,
+ SPDK_BDEV_IO_TYPE_WRITEV_NVME,
+ SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS,
+#endif
SPDK_BDEV_NUM_IO_TYPES /* Keep last */
};
@@ -181,6 +226,14 @@ struct spdk_bdev_io_stat {
uint64_t write_latency_ticks;
uint64_t unmap_latency_ticks;
uint64_t ticks_rate;
+#ifdef SPDK_CONFIG_APP_RW
+ int io_stat_id;
+ uint64_t io_ticks;
+ uint64_t pre_ticks;
+ uint64_t cur_ticks;
+ uint64_t start_tsc;
+ uint64_t interval_tsc;
+#endif
};
struct spdk_bdev_opts {
@@ -1342,6 +1395,38 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t offset, uint64_t nbytes,
spdk_bdev_io_completion_cb cb, void *cb_arg);
+#ifdef SPDK_CONFIG_APP_RW
+/**
+ * Submit an unmap request to the block device. Unmap is sometimes also called trim or
+ * deallocate. This notifies the device that the data in the blocks described is no
+ * longer valid. Reading blocks that have been unmapped results in indeterminate data.
+ *
+ * \param bdev Block device description
+ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
+ * \param unmap_d An array of unmap descriptors.
+ * \param bdesc_count The number of elements in unmap_d.
+ * \param cb Called when the request is complete.
+ * \param cb_arg Argument passed to cb.
+ *
+ * \return 0 on success. On success, the callback will always
+ * be called (even if the request ultimately failed). Return
+ * negated errno on failure, in which case the callback will not be called.
+ */
+int
+spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *unmap_d, uint16_t unmap_count,
+ spdk_bdev_io_completion_cb cb, void *cb_arg);
+
+void*
+spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch);
+
+void*
+spdk_bdev_io_get_pool(size_t nbytes);
+
+bool
+spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh);
+#endif
+
/**
* Submit an unmap request to the block device. Unmap is sometimes also called trim or
* deallocate. This notifies the device that the data in the blocks described is no
diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h
index bbb9f94..c2fd81d 100644
--- a/include/spdk/bdev_module.h
+++ b/include/spdk/bdev_module.h
@@ -222,8 +222,67 @@ struct spdk_bdev_fn_table {
/** Get bdev module context. */
void *(*get_module_ctx)(void *ctx);
+
+#ifdef SPDK_CONFIG_APP_RW
+ uint16_t (*get_io_channel_id)(struct spdk_io_channel *ch);
+
+ int (*bdev_poll_rsp)(void *pollCh);
+
+ uint64_t (*get_timeout_count)(struct spdk_io_channel *ch);
+#endif
+};
+
+#ifdef SPDK_CONFIG_APP_RW
+static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location)
+{
+ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx);
+ uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx;
+ *ioLoc = location;
+}
+
+enum spdk_bdev_driver_ctx {
+ SPDK_BDEV_IO_ACTION_PI,
+ SPDK_BDEV_IO_ACTION_FUA,
+ SPDK_BDEV_IO_STREAM_ID_0,
+ SPDK_BDEV_IO_STREAM_ID_1,
+};
+
+enum spdk_bdev_io_e2e_pi_action{
+ IO_NO_PROTECTION = 0,
+ IO_HALF_WAY_PROTECTION = 1,
+ IO_E2E_PROTECTION = 2
};
+#define FLAG_NO_REF 0x10//bit 4 : 1, disable ctrl ref tag check; 0, enable check
+#define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc
+#define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check
+
+enum spdk_bdev_io_fua{
+ IO_FUA_NO = 0,
+ IO_FUA_YES = 1
+};
+
+void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr);
+
+void spdk_bdev_fail_ctrlr(const char* traddr);
+
+void *nvme_channel_get_group(void *io_ch);
+
+enum reqLocation_E
+{
+ LOCAL_RECEIVE_APP = 1,
+ LOCAL_LIBSTORAGE_SUBMIT = 2,
+ LOCAL_LIBSTORAGE_ASYNC_REQ = 3,
+ LOCAL_LIBSTORAGE_BDEV_NVME = 4,
+ LOCAL_LIBSTORAGE_HUNG_REQ = 5,
+ LOCAL_LIBSTORAGE_TO_DISK = 6,
+ LOCAL_LIBSTORAGE_FROM_DISK = 7,
+ LOCAL_LIBSTORAGE_CALLBACK = 8,
+ LOCAL_LIBSTORAGE_SUBMIT_RETRY = 9,
+ LOCAL_LIBSTORAGE_BDEV_NOMEM = 10,
+};
+#endif
+
/** bdev I/O completion status */
enum spdk_bdev_io_status {
SPDK_BDEV_IO_STATUS_AIO_ERROR = -8,
@@ -407,6 +466,10 @@ struct spdk_bdev {
/** The bdev status */
enum spdk_bdev_status status;
+#ifdef SPDK_CONFIG_APP_RW
+ enum spdk_bdev_ns_status ns_status;
+#endif
+
/**
* Pointer to the module that has claimed this bdev for purposes of creating virtual
* bdevs on top of it. Set to NULL if the bdev has not been claimed.
@@ -528,6 +591,11 @@ struct spdk_bdev_io {
/** Starting offset (in blocks) of the bdev for this I/O. */
uint64_t offset_blocks;
+#ifdef SPDK_CONFIG_APP_RW
+ /* The number of bytes to transfer */
+ size_t nbytes;
+#endif
+
/** stored user callback in case we split the I/O and use a temporary callback */
spdk_bdev_io_completion_cb stored_user_cb;
@@ -595,6 +663,27 @@ struct spdk_bdev_io {
/* The data buffer */
void *buf;
} zone_mgmt;
+#ifdef SPDK_CONFIG_APP_RW
+ struct {
+ /* The data buffer to transfer */
+ void *buf;
+
+ /* The meta data buffer to transfer */
+ void *md_buf;
+
+ /** Total size of data(in blocks) to be transferred. */
+ uint64_t num_blocks;
+
+ /* The number of bytes to transfer */
+ size_t nbytes;
+
+ /** Starting offset (in blocks) of the bdev for this I/O. */
+ size_t offset_blocks;
+
+ /* meta data buffer size to transfer */
+ size_t md_len;
+ } contig;
+#endif
} u;
/** It may be used by modules to put the bdev_io into its own list. */
diff --git a/include/spdk/log.h b/include/spdk/log.h
index ad850ab..e16035c 100644
--- a/include/spdk/log.h
+++ b/include/spdk/log.h
@@ -173,7 +173,7 @@ enum spdk_log_level spdk_log_get_print_level(void);
* \param format Format string to the message.
*/
void spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func,
- const char *format, ...) __attribute__((__format__(__printf__, 5, 6)));
+ const char *format, ...) __attribute__((weak)) __attribute__((__format__(__printf__, 5, 6)));
/**
* Same as spdk_log except that instead of being called with variable number of
diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h
index 45b9f94..8e05139 100644
--- a/include/spdk/nvme.h
+++ b/include/spdk/nvme.h
@@ -2465,6 +2465,7 @@ enum spdk_nvme_ns_flags {
part of the logical block that it is associated with */
SPDK_NVME_NS_WRITE_UNCORRECTABLE_SUPPORTED = 0x40, /**< The write uncorrectable command is supported */
SPDK_NVME_NS_COMPARE_SUPPORTED = 0x80, /**< The compare command is supported */
+ SPDK_NVME_NS_DPS_PI_MDSTART = 0x100 /**< protection info transferred at start of metadata */
};
/**
@@ -3434,6 +3435,235 @@ struct spdk_nvme_transport_ops {
*/
void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops);
+#ifdef SPDK_CONFIG_APP_RW
+struct nvme_ctrlr_info {
+ char ctrlName[16];
+ char pciAddr[24];
+ uint64_t tnvmcap; /* Total NVM Capacity in bytes */
+ uint64_t unvmcap; /* Unallocated NVM Capacity in bytes */
+ int8_t sn[20]; /* Serial number */
+ int8_t mn[40]; /* Model number */
+ uint8_t fr[8]; /* Firmware revision */
+ uint32_t max_num_ns; /* Number of namespaces */
+ uint32_t version; /* Version of the NVM Express specification that the controller implementation supports */
+ uint16_t num_io_queues; /* num of io queues */
+ uint16_t io_queue_size; /* io queue size */
+ uint16_t device_id; /* Device id */
+ uint16_t subdevice_id; /* Subsystem device id */
+ uint16_t vid; /* Vendor id */
+ uint16_t ssvid; /* Subsystem vendor id */
+ uint16_t ctrlid; /* Controller id */
+ uint16_t trtype; /* Transport type */
+ uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */
+ uint16_t directives :1; /* Supports Directives */
+ uint16_t streams :1; /* Supports Streams Directives */
+ uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */
+ uint16_t reserved :12;
+ uint16_t reserved2[3];
+};
+
+struct nvme_ctrlr;
+struct nvme_bdev_ctrlr;
+struct spdk_bdev;
+struct nvme_bdev;
+struct spdk_nvme_ns;
+struct spdk_nvme_qpair;
+int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr);
+struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname);
+struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname);
+void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname);
+void nvme_ctrlr_clear_iostat_all(void);
+struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev);
+struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev);
+void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr);
+int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid);
+bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid);
+void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid);
+void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown);
+bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr);
+int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload);
+int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info);
+struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr);
+int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata);
+bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid);
+bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr);
+bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr);
+bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr);
+bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr);
+bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr);
+int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload);
+int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid);
+int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload);
+int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload);
+int32_t spdk_nvme_ns_get_streams_status(struct spdk_nvme_ns *ns, void *payload);
+int32_t spdk_nvme_ns_alloc_streams_res(struct spdk_nvme_ns *ns, uint16_t nsr);
+int32_t spdk_nvme_ns_release_streams_id(struct spdk_nvme_ns *ns, uint16_t streamsId);
+int32_t spdk_nvme_ns_release_streams_res(struct spdk_nvme_ns *ns);
+void spdk_nvme_use_streams(bool use);
+
+/**
+ * \brief Get the ctrlr is_failed state, for an I/O sent to the given namespace.
+ *
+ * This function is thread safe and can be called at any point while the controller is attached to
+ * the SPDK NVMe driver.
+ */
+bool spdk_nvme_ns_ctrl_is_failed(struct spdk_nvme_ns *ns);
+#define NVME_MAX_CONTROLLERS 1024
+
+/* check nvme whether exist by access cc register */
+bool nvme_ctrlr_is_exist(struct spdk_nvme_ctrlr *ctrlr);
+
+/* create ctrlr for new added device */
+int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, const char *base_name,
+ const char **names, size_t *count, const char *hostnqn);
+
+int spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr);
+void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr);
+
+#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1
+#define SPDK_NVME_UEVENT_SUBSYSTEM_NVME 2
+
+enum spdk_nvme_uevent_action {
+ SPDK_NVME_UEVENT_ADD = 0,
+ SPDK_NVME_UEVENT_REMOVE = 1,
+};
+
+struct spdk_uevent {
+ /* remove or add */
+ enum spdk_nvme_uevent_action action;
+ int subsystem;
+ /* pci address of device */
+ char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
+};
+
+/* make a socket to get uevent */
+int nvme_uevent_connect(void);
+
+/* get uevent from socket fd */
+int nvme_get_uevent(int fd, struct spdk_uevent *uevent);
+
+/* blocked to get uevent from socket fd */
+int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent);
+
+/**
+ * @Description: bind device with pci_addr to driver
+ * @param pci_addr: device's pci_addr,like "0000:08:00.0"
+ * @param driver: driver name which device bind to
+ */
+int32_t spdk_rebind_driver(char *pci_addr, char *driver_name);
+
+/**
+ * \brief True if the protection information transferred at the start of metadata
+ * when end-to-end data protection enabled.
+ *
+ * This function is thread safe and can be called at any point while the controller is attached to
+ * the SPDK NVMe driver.
+ */
+bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns);
+
+/**
+ * \brief True if the namespace supports Dataset Management command.
+ *
+ * This function is thread safe and can be called at any point while the controller is attached to
+ * the SPDK NVMe driver.
+ */
+bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns);
+
+/**
+ * Submit a data set management request to the specified NVMe namespace. Data set
+ * management operations are designed to optimize interaction with the block
+ * translation layer inside the device. The most common type of operation is
+ * deallocate, which is often referred to as TRIM or UNMAP.
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any
+ * given time.
+ *
+ * This is a convenience wrapper that will automatically allocate and construct
+ * the correct data buffers. Therefore, ranges does not need to be allocated from
+ * pinned memory and can be placed on the stack. If a higher performance, zero-copy
+ * version of DSM is required, simply build and submit a raw command using
+ * spdk_nvme_ctrlr_cmd_io_raw().
+ *
+ * \param ns NVMe namespace to submit the DSM request
+ * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute.
+ * \param qpair I/O queue pair to submit the request
+ * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs
+ * to operate on.
+ * \param num_ranges The number of elements in the ranges array.
+ * \param cb_fn Callback function to invoke when the I/O is completed
+ * \param cb_arg Argument to pass to the callback function
+ *
+ * \return 0 if successfully submitted, negated POSIX errno values otherwise.
+ */
+int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint32_t type,
+ const struct spdk_nvme_dsm_range *ranges,
+ uint16_t num_ranges,
+ spdk_nvme_cmd_cb cb_fn,
+ void *cb_arg);
+/**
+ * \brief Submits a write I/O to the specified NVMe namespace.
+ *
+ * \param ns NVMe namespace to submit the write I/O
+ * \param qpair I/O queue pair to submit the request
+ * \param lba starting LBA to write the data
+ * \param lba_count length (in sectors) for the write operation
+ * \param streamId The stream id for write I/O
+ * \param cb_fn callback function to invoke when the I/O is completed
+ * \param cb_arg argument to pass to the callback function
+ * \param io_flags set flags, defined in nvme_spec.h, for this I/O
+ * \param reset_sgl_fn callback function to reset scattered payload
+ * \param next_sge_fn callback function to iterate each scattered
+ * payload memory segment
+ *
+ * \return 0 if successfully submitted, ENOMEM if an nvme_request
+ * structure cannot be allocated for the I/O request
+ *
+ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
+ * The user must ensure that only one thread submits I/O on a given qpair at any given time.
+ */
+int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
+ uint64_t lba, uint32_t lba_count, uint16_t streamId,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
+ spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
+ spdk_nvme_req_next_sge_cb next_sge_fn);
+
+/**
+ * \brief Send comman to NVMe controller to start or abort a self-test operation.
+ *
+ * \param ctrlr NVMe controller to operate self-test command.
+ * \param nsid Depending on the log page, this may be 0, a namespace identifier, or SPDK_NVME_GLOBAL_NS_TAG.
+ * \param stc self-test code, which specifies the action taken by the Device Self-test command.
+ * \param payload The pointer to the payload buffer. it doesn't work actually.
+ * \param payload_size The size of payload buffer. it doesn't work actually.
+ * \param cb_fn Callback function to invoke when the feature has been retrieved.
+ * \param cb_arg Argument to pass to the callback function.
+ *
+ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request
+ *
+ * This function is thread safe and can be called at any point while the controller is attached to
+ * the SPDK NVMe driver.
+ *
+ * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion
+ * of commands submitted through this function.
+ *
+ * \sa spdk_nvme_ctrlr_cmd_self_test_operation()
+ */
+int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc,
+ void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+
+/**
+ *\get I/O queue pair id
+ *\param qpair I/O queue pair to submit the request
+ *\
+ *\return I/O queue pair id
+ */
+uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair);
+#endif
+
/*
* Macro used to register new transports.
*/
diff --git a/include/spdk/thread.h b/include/spdk/thread.h
index 4b7e650..7c52433 100644
--- a/include/spdk/thread.h
+++ b/include/spdk/thread.h
@@ -42,6 +42,9 @@
#include "spdk/cpuset.h"
#include "spdk/queue.h"
+#ifdef SPDK_CONFIG_APP_RW
+#include "rte_config.h"
+#endif
#ifdef __cplusplus
extern "C" {
@@ -57,6 +60,21 @@ enum spdk_thread_poller_rc {
*/
struct spdk_thread;
+#ifdef SPDK_CONFIG_APP_RW
+struct spdk_iodev_thread_info {
+ struct spdk_thread *thread;
+ volatile int32_t state;
+ uint32_t bdevnum;
+};
+extern struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE];
+
+void spdk_reactors_use(bool useOrNot);
+
+bool spdk_get_reactor_type(void);
+
+void spdk_set_thread_exited(struct spdk_thread *thread);
+#endif
+
/**
* A function repeatedly called on the same spdk_thread.
*/
diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h
new file mode 100644
index 0000000..f1ba1df
--- /dev/null
+++ b/include/spdk_internal/bdev_stat.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#ifndef LIBSTORAGE_STAT_H
+#define LIBSTORAGE_STAT_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <inttypes.h>
+
+//share memory file name
+#define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\
+49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b"
+
+//max number of channel+bdev
+#define STAT_MAX_NUM 8192
+
+extern int32_t g_libstorage_iostat;
+extern int32_t g_polltime_threshold;
+
+extern pthread_mutex_t *g_io_stat_map_mutex;
+
+/* libstorage iostat status */
+enum libstorage_iostat_status {
+ LIBSTORAGE_IOSTAT_DISABLE = 0,
+ LIBSTORAGE_IOSTAT_ENABLE = 1,
+ LIBSTORAGE_IOSTAT_QUERY = 2,
+};
+
+struct libstorage_bdev_io_stat
+{
+ bool used;
+ uint16_t channel_id;
+ char bdev_name[24];
+ uint64_t num_read_ops;
+ uint64_t num_write_ops;
+ uint64_t bytes_read;
+ uint64_t bytes_written;
+ uint64_t io_outstanding;
+ uint64_t read_latency_ticks;
+ uint64_t write_latency_ticks;
+ uint64_t io_ticks;
+ bool poll_time_used;
+ uint64_t num_poll_timeout;
+};
+
+extern struct libstorage_bdev_io_stat *g_io_stat_map;
+
+int libstorage_stat_init(void);
+
+int libstorage_stat_exit(void);
+#endif
diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h
new file mode 100644
index 0000000..5d6e623
--- /dev/null
+++ b/include/spdk_internal/debug.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#ifndef LIBSTORAGE_INTERNAL_DEBUG_H
+#define LIBSTORAGE_INTERNAL_DEBUG_H
+#include "spdk/stdinc.h"
+
+struct spdk_debug_subsystem
+{
+ const char *name;
+ void (*output)(FILE *file);
+ TAILQ_ENTRY(spdk_debug_subsystem) tailq;
+};
+
+void spdk_add_debug_subsystem(struct spdk_debug_subsystem *subsystem);
+
+/**
+ * \brief Register a new subsystem
+ */
+#define SPDK_DEBUG_REGISTER(_name, _output) \
+ struct spdk_debug_subsystem __spdk_debug_subsystem_ ## _name = \
+ { \
+ .name = #_name, \
+ .output = _output, \
+ }; \
+ __attribute__((constructor)) static void _name ## _debug_register(void) \
+ { \
+ spdk_add_debug_subsystem(&__spdk_debug_subsystem_ ## _name); \
+ }
+
+void spdk_output_debug_info(void);
+
+#endif
diff --git a/include/spdk_internal/thread.h b/include/spdk_internal/thread.h
index 5bab452..7d1811b 100644
--- a/include/spdk_internal/thread.h
+++ b/include/spdk_internal/thread.h
@@ -80,6 +80,8 @@ struct spdk_poller {
};
enum spdk_thread_state {
+ SPDK_THREAD_STATE_INITIALIZED,
+
/* The thread is pocessing poller and message by spdk_thread_poll(). */
SPDK_THREAD_STATE_RUNNING,
diff --git a/mk/spdk.app_vars.mk b/mk/spdk.app_vars.mk
index 059a56e..ff8fad5 100644
--- a/mk/spdk.app_vars.mk
+++ b/mk/spdk.app_vars.mk
@@ -57,8 +57,10 @@ SPDK_LIB_LINKER_ARGS = \
-L$(SPDK_ROOT_DIR)/build/lib \
-Wl,--whole-archive \
-Wl,--no-as-needed \
+ -Wl,-Bstatic \
$(SPDK_DEPLIB_LIST:%=-lspdk_%) \
- -Wl,--no-whole-archive
+ -Wl,--no-whole-archive \
+ -Wl,-Bdynamic
# This is primarily used for unit tests to ensure they link when shared library
# build is enabled. Shared libraries can't get their mock implementation from
--
2.33.0

View File

@ -0,0 +1,953 @@
From 214b56fd5a6fd40113c3bf912f0cf1ca7a07abae Mon Sep 17 00:00:00 2001
From: sunshihao <sunshihao@huawei.com>
Date: Thu, 18 Feb 2021 16:49:16 +0800
Subject: [PATCH 18/27] lib/bdev: Add bdev support for HSAK
Signed-off-by: sunshihao <sunshihao@huawei.com>
---
include/spdk/bdev.h | 21 ++-
include/spdk/bdev_module.h | 9 +-
include/spdk/nvme.h | 42 +++---
include/spdk_internal/bdev_stat.h | 14 +-
include/spdk_internal/debug.h | 5 +-
lib/accel/accel_engine.c | 4 +
lib/bdev/Makefile | 1 +
lib/bdev/bdev.c | 173 ++++++++++++++++++++++--
lib/bdev/bdev_internal.h | 18 +++
lib/bdev/bdev_self.c | 217 ++++++++++++++++++++++++++++++
10 files changed, 449 insertions(+), 55 deletions(-)
create mode 100644 lib/bdev/bdev_self.c
diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h
index 2951660..22b87ec 100644
--- a/include/spdk/bdev.h
+++ b/include/spdk/bdev.h
@@ -131,23 +131,22 @@ typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, vo
typedef struct libstorage_io {
uint8_t *buf;
- struct iovec *iovs; /* array of iovecs to transfer */
- int iovcnt; /* Number of iovecs in iovs array */
- int32_t fd; /* File Descriptor */
- uint16_t opcode; /* r/w */
- uint16_t streamId; /* Stream ID for IO */
+ struct iovec *iovs; /* array of iovecs to transfer */
+ int iovcnt; /* Number of iovecs in iovs array */
+ int32_t fd; /* File Descriptor */
+ uint16_t opcode; /* r/w */
+ uint16_t streamId; /* Stream ID for IO */
uint8_t pi_action;
uint8_t fua;
uint8_t location;
- bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */
+ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */
uint32_t count;
uint32_t nbytes;
uint64_t offset;
uint8_t *md_buf;
uint32_t md_len;
uint32_t magic;
- /*Save the error code returned by the callback */
- int32_t err;
+ int32_t err; /* Save the error code returned by the callback */
int32_t reserved;
LIBSTORAGE_CALLBACK_FUNC cb;
void *cb_arg;
@@ -1395,7 +1394,7 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
uint64_t offset, uint64_t nbytes,
spdk_bdev_io_completion_cb cb, void *cb_arg);
-#ifdef SPDK_CONFIG_APP_RW
+#ifdef SPDK_CONFIG_APP_RW
/**
* Submit an unmap request to the block device. Unmap is sometimes also called trim or
* deallocate. This notifies the device that the data in the blocks described is no
@@ -1417,10 +1416,10 @@ spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel
void *unmap_d, uint16_t unmap_count,
spdk_bdev_io_completion_cb cb, void *cb_arg);
-void*
+void *
spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch);
-void*
+void *
spdk_bdev_io_get_pool(size_t nbytes);
bool
diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h
index c2fd81d..3ff7e28 100644
--- a/include/spdk/bdev_module.h
+++ b/include/spdk/bdev_module.h
@@ -247,7 +247,7 @@ enum spdk_bdev_driver_ctx {
SPDK_BDEV_IO_STREAM_ID_1,
};
-enum spdk_bdev_io_e2e_pi_action{
+enum spdk_bdev_io_e2e_pi_action {
IO_NO_PROTECTION = 0,
IO_HALF_WAY_PROTECTION = 1,
IO_E2E_PROTECTION = 2
@@ -257,19 +257,18 @@ enum spdk_bdev_io_e2e_pi_action{
#define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc
#define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check
-enum spdk_bdev_io_fua{
+enum spdk_bdev_io_fua {
IO_FUA_NO = 0,
IO_FUA_YES = 1
};
void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr);
-void spdk_bdev_fail_ctrlr(const char* traddr);
+void spdk_bdev_fail_ctrlr(const char *traddr);
void *nvme_channel_get_group(void *io_ch);
-enum reqLocation_E
-{
+enum reqLocation_E {
LOCAL_RECEIVE_APP = 1,
LOCAL_LIBSTORAGE_SUBMIT = 2,
LOCAL_LIBSTORAGE_ASYNC_REQ = 3,
diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h
index 8e05139..adda642 100644
--- a/include/spdk/nvme.h
+++ b/include/spdk/nvme.h
@@ -3454,11 +3454,11 @@ struct nvme_ctrlr_info {
uint16_t ssvid; /* Subsystem vendor id */
uint16_t ctrlid; /* Controller id */
uint16_t trtype; /* Transport type */
- uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */
- uint16_t directives :1; /* Supports Directives */
- uint16_t streams :1; /* Supports Streams Directives */
- uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */
- uint16_t reserved :12;
+ uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */
+ uint16_t directives : 1; /* Supports Directives */
+ uint16_t streams : 1; /* Supports Streams Directives */
+ uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */
+ uint16_t reserved : 12;
uint16_t reserved2[3];
};
@@ -3468,23 +3468,25 @@ struct spdk_bdev;
struct nvme_bdev;
struct spdk_nvme_ns;
struct spdk_nvme_qpair;
-int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr);
-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname);
-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
-struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname);
-void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname);
+int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr);
+struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname);
+struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
+struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *ctrlname);
+void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname);
void nvme_ctrlr_clear_iostat_all(void);
-struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev);
-struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev);
+struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev);
+struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev);
void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr);
int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid);
bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid);
void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid);
void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown);
bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr);
-int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload);
-int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info);
-struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr);
+int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ struct spdk_nvme_health_information_page *health_payload);
+int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries,
+ struct spdk_nvme_error_information_entry *error_info);
+struct spdk_nvme_ctrlr_opts *spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr);
int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata);
bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid);
bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr);
@@ -3492,7 +3494,8 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr);
bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr);
bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr);
bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr);
-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload);
+int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid,
+ void *payload);
int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid);
int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload);
int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload);
@@ -3651,9 +3654,10 @@ int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa
*
* \sa spdk_nvme_ctrlr_cmd_self_test_operation()
*/
-int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc,
- void *payload, uint32_t payload_size,
- spdk_nvme_cmd_cb cb_fn, void *cb_arg);
+int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
+ uint32_t stc,
+ void *payload, uint32_t payload_size,
+ spdk_nvme_cmd_cb cb_fn, void *cb_arg);
/**
*\get I/O queue pair id
diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h
index f1ba1df..58a5102 100644
--- a/include/spdk_internal/bdev_stat.h
+++ b/include/spdk_internal/bdev_stat.h
@@ -9,21 +9,18 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
-*/
+ */
#ifndef LIBSTORAGE_STAT_H
#define LIBSTORAGE_STAT_H
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <inttypes.h>
+#include "spdk/stdinc.h"
-//share memory file name
+/* share memory file name */
#define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\
49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b"
-//max number of channel+bdev
+/* max number of channel+bdev */
#define STAT_MAX_NUM 8192
extern int32_t g_libstorage_iostat;
@@ -38,8 +35,7 @@ enum libstorage_iostat_status {
LIBSTORAGE_IOSTAT_QUERY = 2,
};
-struct libstorage_bdev_io_stat
-{
+struct libstorage_bdev_io_stat {
bool used;
uint16_t channel_id;
char bdev_name[24];
diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h
index 5d6e623..cf9b9e7 100644
--- a/include/spdk_internal/debug.h
+++ b/include/spdk_internal/debug.h
@@ -9,14 +9,13 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
-*/
+ */
#ifndef LIBSTORAGE_INTERNAL_DEBUG_H
#define LIBSTORAGE_INTERNAL_DEBUG_H
#include "spdk/stdinc.h"
-struct spdk_debug_subsystem
-{
+struct spdk_debug_subsystem {
const char *name;
void (*output)(FILE *file);
TAILQ_ENTRY(spdk_debug_subsystem) tailq;
diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c
index ca3e248..865128a 100644
--- a/lib/accel/accel_engine.c
+++ b/lib/accel/accel_engine.c
@@ -745,7 +745,11 @@ spdk_accel_engine_module_finish(void)
}
if (g_accel_engine_module->module_fini) {
+#ifndef SPDK_CONFIG_APP_RW
spdk_thread_send_msg(spdk_get_thread(), g_accel_engine_module->module_fini, NULL);
+#else
+ g_accel_engine_module->module_fini(NULL);
+#endif
} else {
spdk_accel_engine_module_finish();
}
diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile
index 795fa6e..c23caf1 100644
--- a/lib/bdev/Makefile
+++ b/lib/bdev/Makefile
@@ -42,6 +42,7 @@ CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify
endif
C_SRCS = bdev.c bdev_rpc.c bdev_zone.c part.c scsi_nvme.c
+C_SRCS-$(CONFIG_APP_RW) += bdev_self.c
C_SRCS-$(CONFIG_VTUNE) += vtune.c
LIBNAME = bdev
diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c
index 2a642d6..bf102bb 100644
--- a/lib/bdev/bdev.c
+++ b/lib/bdev/bdev.c
@@ -50,6 +50,13 @@
#include "spdk/log.h"
#include "spdk/string.h"
+#ifdef SPDK_CONFIG_APP_RW
+#include "spdk/stdinc.h"
+#include "spdk/barrier.h"
+#include <securec.h>
+#include "spdk_internal/bdev_stat.h"
+#endif
+
#include "bdev_internal.h"
#ifdef SPDK_CONFIG_VTUNE
@@ -1377,8 +1384,12 @@ spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name,
g_bdev_opts.small_buf_pool_size,
+#ifdef SPDK_CONFIG_APP_RW
+ SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD,
+#else
SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_SMALL_BUF_MAX_SIZE) +
SPDK_BDEV_POOL_ALIGNMENT,
+#endif
cache_size,
SPDK_ENV_SOCKET_ID_ANY);
if (!g_bdev_mgr.buf_small_pool) {
@@ -1392,8 +1403,12 @@ spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name,
g_bdev_opts.large_buf_pool_size,
+#ifdef SPDK_CONFIG_APP_RW
+ SPDK_BDEV_LARGE_BUF_MAX_SIZE + SPDK_BDEV_LARGE_BUF_WITH_MAX_MD,
+#else
SPDK_BDEV_BUF_SIZE_WITH_MD(SPDK_BDEV_LARGE_BUF_MAX_SIZE) +
SPDK_BDEV_POOL_ALIGNMENT,
+#endif
cache_size,
SPDK_ENV_SOCKET_ID_ANY);
if (!g_bdev_mgr.buf_large_pool) {
@@ -1561,7 +1576,11 @@ bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
* (like bdev part free) that will use this bdev (or private bdev driver ctx data)
* after returning.
*/
+#ifdef SPDK_CONFIG_APP_RW
+ bdev_module_finish_iter(NULL);
+#else
spdk_thread_send_msg(spdk_get_thread(), bdev_module_finish_iter, NULL);
+#endif
return;
}
@@ -2296,6 +2315,17 @@ _bdev_io_submit(void *ctx)
bdev_io->internal.submit_tsc = tsc;
spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io, bdev_io->type);
+#ifdef SPDK_CONFIG_APP_RW
+ struct spdk_bdev_io_stat *stat = &bdev_ch->stat;
+ if (bdev_ch->io_outstanding > 0) {
+ stat->pre_ticks = stat->cur_ticks;
+ stat->cur_ticks = tsc;
+ stat->io_ticks += stat->cur_ticks - stat->pre_ticks;
+ } else {
+ stat->cur_ticks = tsc;
+ }
+#endif
+
if (spdk_likely(bdev_ch->flags == 0)) {
bdev_io_do_submit(bdev_ch, bdev_io);
return;
@@ -2307,6 +2337,9 @@ _bdev_io_submit(void *ctx)
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) &&
bdev_abort_queued_io(&bdev->internal.qos->queued, bdev_io->u.abort.bio_to_abort)) {
_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+#ifdef SPDK_CONFIG_APP_RW
+ spdk_bdev_set_io_location(bdev_io->driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NOMEM);
+#endif
} else {
TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
@@ -2652,6 +2685,7 @@ bdev_desc_free(struct spdk_bdev_desc *desc)
pthread_mutex_destroy(&desc->mutex);
free(desc->media_events_buffer);
free(desc);
+ desc = NULL;
}
static void
@@ -2837,6 +2871,9 @@ bdev_channel_create(void *io_device, void *ctx_buf)
ch->flags = 0;
ch->shared_resource = shared_resource;
+#ifdef SPDK_CONFIG_APP_RW
+ spdk_bdev_init_iostat(ch, ch->bdev, ch->channel, &ch->stat);
+#endif
TAILQ_INIT(&ch->io_submitted);
TAILQ_INIT(&ch->io_locked);
@@ -3075,6 +3112,10 @@ bdev_channel_destroy(void *io_device, void *ctx_buf)
spdk_histogram_data_free(ch->histogram);
}
+#ifdef SPDK_CONFIG_APP_RW
+ spdk_bdev_destroy_iostat(ch, ch->bdev, ch->channel);
+#endif
+
bdev_channel_destroy_resource(ch);
}
@@ -3527,6 +3568,26 @@ _bdev_io_check_md_buf(const struct iovec *iovs, const void *md_buf)
return _is_buf_allocated(iovs) == (md_buf != NULL);
}
+static void
+bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_blocks,
+ uint64_t num_blocks,
+ struct libstorage_io *io, struct spdk_bdev_io *bdev_io)
+{
+ bdev_io->type = type;
+ bdev_io->u.contig.buf = buf;
+ bdev_io->u.contig.md_buf = md_buf;
+ bdev_io->u.contig.offset_blocks = offset_blocks;
+ bdev_io->u.contig.num_blocks = num_blocks;
+ bdev_io->u.contig.nbytes = io->nbytes;
+ bdev_io->u.contig.md_len = io->md_len;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua;
+ if (type == SPDK_BDEV_IO_TYPE_WRITE_NVME) {
+ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF;
+ }
+}
+
static int
bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
void *md_buf, int64_t offset_blocks, uint64_t num_blocks,
@@ -3547,6 +3608,7 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
+#ifndef SPDK_CONFIG_APP_RW
bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
bdev_io->u.bdev.iovs = &bdev_io->iov;
bdev_io->u.bdev.iovs[0].iov_base = buf;
@@ -3555,6 +3617,12 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
+#else
+ struct libstorage_io *io = (struct libstorage_io *)cb_arg;
+ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_READ_NVME, buf, md_buf, offset_blocks, num_blocks,
+ io, bdev_io);
+ cb_arg = &io->location;
+#endif
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io_submit(bdev_io);
@@ -3592,7 +3660,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe
struct iovec iov = {
.iov_base = buf,
};
-
+#ifndef SPDK_CONFIG_APP_RW
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
@@ -3600,7 +3668,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
return -EINVAL;
}
-
+#endif
return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
cb, cb_arg);
}
@@ -3647,6 +3715,14 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
+#ifdef SPDK_CONFIG_APP_RW
+ struct libstorage_io *io = (struct libstorage_io *)cb_arg;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_READV_NVME;
+ bdev_io->u.bdev.nbytes = io->nbytes;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua;
+ cb_arg = &io->location;
+#endif
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io_submit(bdev_io);
@@ -3668,6 +3744,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
+#ifndef SPDK_CONFIG_APP_RW
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
@@ -3675,7 +3752,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
if (!_bdev_io_check_md_buf(iov, md_buf)) {
return -EINVAL;
}
-
+#endif
return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
}
@@ -3689,9 +3766,11 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+#ifndef SPDK_CONFIG_APP_RW
if (!desc->write) {
return -EBADF;
}
+#endif
if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
return -EINVAL;
@@ -3704,6 +3783,7 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
+#ifndef SPDK_CONFIG_APP_RW
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.bdev.iovs = &bdev_io->iov;
bdev_io->u.bdev.iovs[0].iov_base = buf;
@@ -3712,6 +3792,12 @@ bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
+#else
+ LIBSTORAGE_IO_T *io = (struct libstorage_io *)cb_arg;
+ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_WRITE_NVME, buf, md_buf, offset_blocks, num_blocks,
+ io, bdev_io);
+ cb_arg = &io->location;
+#endif
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io_submit(bdev_io);
@@ -3751,6 +3837,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
.iov_base = buf,
};
+#ifndef SPDK_CONFIG_APP_RW
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
@@ -3758,7 +3845,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
if (!_bdev_io_check_md_buf(&iov, md_buf)) {
return -EINVAL;
}
-
+#endif
return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
cb, cb_arg);
}
@@ -3773,9 +3860,11 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+#ifndef SPDK_CONFIG_APP_RW
if (!desc->write) {
return -EBADF;
}
+#endif
if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
return -EINVAL;
@@ -3794,6 +3883,16 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
+#ifdef SPDK_CONFIG_APP_RW
+ struct libstorage_io *io = (struct libstorage_io *)cb_arg;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_WRITEV_NVME;
+ bdev_io->u.bdev.nbytes = io->nbytes;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF;
+ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF;
+ cb_arg = &io->location;
+#endif
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io_submit(bdev_io);
@@ -3832,6 +3931,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
+#ifndef SPDK_CONFIG_APP_RW
if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
return -EINVAL;
}
@@ -3839,7 +3939,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
if (!_bdev_io_check_md_buf(iov, md_buf)) {
return -EINVAL;
}
-
+#endif
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
num_blocks, cb, cb_arg);
}
@@ -5111,8 +5211,16 @@ bdev_io_complete(void *ctx)
default:
break;
}
+#ifdef SPDK_CONFIG_APP_RW
+ bdev_io_stat_update(bdev_io, tsc, &bdev_io->internal.ch->stat);
+#endif
}
+#ifdef SPDK_CONFIG_APP_RW
+ bdev_update_iostat_map(bdev_io, tsc, &bdev_io->internal.ch->stat, bdev_io->internal.ch->channel,
+ bdev_io->internal.ch->io_outstanding);
+#endif
+
#ifdef SPDK_CONFIG_VTUNE
uint64_t now_tsc = spdk_get_ticks();
if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
@@ -5134,7 +5242,9 @@ bdev_io_complete(void *ctx)
#endif
assert(bdev_io->internal.cb != NULL);
+#ifndef SPDK_CONFIG_APP_RW
assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io));
+#endif
bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
bdev_io->internal.caller_ctx);
@@ -5208,6 +5318,9 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) {
TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
+#ifdef SPDK_CONFIG_APP_RW
+ spdk_bdev_set_io_location(bdev_io->driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NOMEM);
+#endif
/*
* Wait for some of the outstanding I/O to complete before we
* retry any of the nomem_io. Normally we will wait for
@@ -5613,8 +5726,8 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev)
* immediately closes its descriptor.
*/
desc->refs++;
- spdk_thread_send_msg(desc->thread, _remove_notify, desc);
pthread_mutex_unlock(&desc->mutex);
+ spdk_thread_send_msg(desc->thread, _remove_notify, desc);
}
/* If there are no descriptors, proceed removing the bdev */
@@ -5858,9 +5971,9 @@ spdk_bdev_close(struct spdk_bdev_desc *desc)
SPDK_DEBUGLOG(bdev, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
spdk_get_thread());
-
+#ifndef SPDK_CONFIG_APP_RW
assert(desc->thread == spdk_get_thread());
-
+#endif
spdk_poller_unregister(&desc->io_timeout_poller);
pthread_mutex_lock(&bdev->internal.mutex);
@@ -6909,6 +7022,50 @@ bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
return 0;
}
+#ifdef SPDK_CONFIG_APP_RW
+void *
+spdk_bdev_io_get_pool(size_t nbytes)
+{
+ struct spdk_mempool *pool = NULL;
+
+ if (nbytes == 0 || nbytes > SPDK_BDEV_LARGE_BUF_MAX_SIZE + SPDK_BDEV_LARGE_BUF_WITH_MAX_MD) {
+ SPDK_ERRLOG("The size of buffer[%zu] is incorrect!\n", nbytes);
+ return NULL;
+ }
+
+ if (nbytes <= SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD) {
+ pool = g_bdev_mgr.buf_small_pool;
+ } else {
+ pool = g_bdev_mgr.buf_large_pool;
+ }
+
+ return pool;
+}
+
+void *
+spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch)
+{
+ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch);
+ struct spdk_io_channel *under_io_ch = ch->channel;
+ void *nvme_io_ch = spdk_io_channel_get_ctx(under_io_ch);
+
+ return nvme_channel_get_group(nvme_io_ch);
+}
+
+bool
+spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch)
+{
+ struct spdk_bdev_channel *bdev_ch = NULL;
+
+ if (io_ch != NULL) {
+ bdev_ch = spdk_io_channel_get_ctx(io_ch);
+ return bdev_ch->io_outstanding != 0;
+ }
+
+ return false;
+}
+#endif
+
SPDK_LOG_REGISTER_COMPONENT(bdev)
SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)
diff --git a/lib/bdev/bdev_internal.h b/lib/bdev/bdev_internal.h
index d1fa6e6..871387f 100644
--- a/lib/bdev/bdev_internal.h
+++ b/lib/bdev/bdev_internal.h
@@ -47,4 +47,22 @@ void bdev_io_init(struct spdk_bdev_io *bdev_io, struct spdk_bdev *bdev, void *cb
void bdev_io_submit(struct spdk_bdev_io *bdev_io);
+#ifdef SPDK_CONFIG_APP_RW
+void
+spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev,
+ struct spdk_io_channel *io_ch,
+ struct spdk_bdev_io_stat *stat);
+
+void
+spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev,
+ struct spdk_io_channel *io_ch);
+
+void
+bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat);
+
+void
+bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat,
+ struct spdk_io_channel *channel, uint64_t io_outstanding);
+#endif
+
#endif /* SPDK_BDEV_INTERNAL_H */
diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c
new file mode 100644
index 0000000..7050c30
--- /dev/null
+++ b/lib/bdev/bdev_self.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "bdev_internal.h"
+
+#include "spdk/stdinc.h"
+#include "spdk/bdev.h"
+#include "spdk/bdev_module.h"
+#include "spdk/env.h"
+#include "spdk/nvme_spec.h"
+#include "spdk/log.h"
+
+#include <securec.h>
+#include "spdk_internal/bdev_stat.h"
+
+pthread_mutex_t *g_io_stat_map_mutex = NULL;
+/* share memory for libstorage iostat */
+struct libstorage_bdev_io_stat *g_io_stat_map;
+/* libstorage iostat enable or disable switch */
+int32_t g_libstorage_iostat = 0;
+int32_t g_polltime_threshold = 0;
+
+void
+spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev,
+ struct spdk_io_channel *io_ch,
+ struct spdk_bdev_io_stat *stat)
+{
+ int i = 0;
+ bool find = false;
+ uint16_t channel_id;
+
+ if (bdev->fn_table->get_io_channel_id) {
+ channel_id = bdev->fn_table->get_io_channel_id(io_ch);
+ for (i = 0; i < STAT_MAX_NUM; i++) {
+ /* Reuse last record */
+ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name)
+ && g_io_stat_map[i].channel_id == channel_id) {
+ stat->io_stat_id = i;
+ find = true;
+ g_io_stat_map[i].num_read_ops = 0;
+ g_io_stat_map[i].num_write_ops = 0;
+ g_io_stat_map[i].bytes_read = 0;
+ g_io_stat_map[i].bytes_written = 0;
+ g_io_stat_map[i].io_outstanding = 0;
+ g_io_stat_map[i].read_latency_ticks = 0;
+ g_io_stat_map[i].write_latency_ticks = 0;
+ g_io_stat_map[i].io_ticks = 0;
+ g_io_stat_map[i].poll_time_used = false;
+ g_io_stat_map[i].num_poll_timeout = 0;
+ break;
+ }
+ }
+ if (!find) {
+ /* Add lock when multi thread or process */
+ if (pthread_mutex_lock(g_io_stat_map_mutex) == EOWNERDEAD) {
+ if (pthread_mutex_consistent(g_io_stat_map_mutex)) {
+ SPDK_WARNLOG("[libstorage] the iostat_map process mutex is not normal any more.\n");
+ }
+ }
+ for (i = 0; i < STAT_MAX_NUM; i++) {
+ /* Find unused record, allocate it to this channel */
+ if (!g_io_stat_map[i].used) {
+ g_io_stat_map[i].used = true;
+ if (strncpy_s(g_io_stat_map[i].bdev_name, sizeof(g_io_stat_map[i].bdev_name), bdev->name,
+ sizeof(g_io_stat_map[i].bdev_name) - 1) != 0) {
+ SPDK_ERRLOG("[libstorage] string copy failed.\n");
+ }
+ g_io_stat_map[i].channel_id = channel_id;
+ stat->io_stat_id = i;
+ find = true;
+ break;
+ }
+ }
+ pthread_mutex_unlock(g_io_stat_map_mutex);
+ }
+ if (!find) {
+ stat->io_stat_id = -1;
+ SPDK_ERRLOG("channel %u bdev %s allocate io stat memory failed.\n", channel_id, bdev->name);
+ }
+ } else {
+ /* It is not nvme disk, can use iostat. So do not do IO statistics in libstorage. */
+ stat->io_stat_id = -1;
+ }
+ stat->start_tsc = spdk_get_ticks();
+ stat->interval_tsc = spdk_get_ticks_hz() / 10;
+}
+
+void
+spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev,
+ struct spdk_io_channel *io_ch)
+{
+ int i = 0;
+ uint16_t channel_id;
+
+ if (bdev->fn_table->get_io_channel_id) {
+ channel_id = bdev->fn_table->get_io_channel_id(io_ch);
+ for (i = 0; i < STAT_MAX_NUM; i++) {
+ /* clear channel iostat info in share memory */
+ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name)
+ && g_io_stat_map[i].channel_id == channel_id) {
+ g_io_stat_map[i].channel_id = 0;
+ memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name));
+ g_io_stat_map[i].num_read_ops = 0;
+ g_io_stat_map[i].num_write_ops = 0;
+ g_io_stat_map[i].bytes_read = 0;
+ g_io_stat_map[i].bytes_written = 0;
+ g_io_stat_map[i].io_outstanding = 0;
+ g_io_stat_map[i].read_latency_ticks = 0;
+ g_io_stat_map[i].write_latency_ticks = 0;
+ g_io_stat_map[i].io_ticks = 0;
+ /* used flag set false in last avoid race in channel create */
+ g_io_stat_map[i].used = false;
+ g_io_stat_map[i].poll_time_used = false;
+ g_io_stat_map[i].num_poll_timeout = 0;
+ }
+ }
+ }
+}
+
+int
+spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
+ void *unmap_d, uint16_t unmap_count,
+ spdk_bdev_io_completion_cb cb, void *cb_arg)
+{
+ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
+ struct spdk_bdev_io *bdev_io = NULL;
+ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
+
+ bdev_io = bdev_channel_get_io(channel);
+ if (bdev_io == NULL) {
+ return -ENOMEM;
+ }
+
+ bdev_io->internal.ch = channel;
+ bdev_io->internal.desc = desc;
+ bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS;
+ bdev_io->u.contig.buf = unmap_d;
+ bdev_io->u.contig.num_blocks = unmap_count;
+ bdev_io_init(bdev_io, bdev, cb_arg, cb);
+
+ bdev_io_submit(bdev_io);
+ return 0;
+}
+
+void
+bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ_NVME:
+ stat->bytes_read += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len;
+ stat->num_read_ops++;
+ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_NVME:
+ stat->bytes_written += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len;
+ stat->num_write_ops++;
+ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ case SPDK_BDEV_IO_TYPE_READV_NVME:
+ stat->bytes_read += bdev_io->u.bdev.nbytes;
+ stat->num_read_ops++;
+ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITEV_NVME:
+ stat->bytes_written += bdev_io->u.bdev.nbytes;
+ stat->num_write_ops++;
+ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc);
+ break;
+ default:
+ break;
+ }
+}
+
+void
+bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat,
+ struct spdk_io_channel *channel, uint64_t io_outstanding)
+{
+ uint64_t num_poll_timeout;
+
+ stat->pre_ticks = stat->cur_ticks;
+ stat->cur_ticks = tsc;
+ stat->io_ticks += stat->cur_ticks - stat->pre_ticks;
+
+ if (g_libstorage_iostat) {
+ if ((stat->io_stat_id >= 0) && (stat->io_stat_id < STAT_MAX_NUM)) {
+ g_io_stat_map[stat->io_stat_id].io_outstanding = io_outstanding;
+ if (tsc > (stat->start_tsc + stat->interval_tsc)) {
+ g_io_stat_map[stat->io_stat_id].num_read_ops = stat->num_read_ops;
+ g_io_stat_map[stat->io_stat_id].num_write_ops = stat->num_write_ops;
+ g_io_stat_map[stat->io_stat_id].bytes_read = stat->bytes_read;
+ g_io_stat_map[stat->io_stat_id].bytes_written = stat->bytes_written;
+ g_io_stat_map[stat->io_stat_id].read_latency_ticks = stat->read_latency_ticks;
+ g_io_stat_map[stat->io_stat_id].write_latency_ticks = stat->write_latency_ticks;
+ g_io_stat_map[stat->io_stat_id].io_ticks = stat->io_ticks;
+
+ stat->start_tsc = tsc;
+
+ if (g_polltime_threshold) {
+ num_poll_timeout = bdev_io->bdev->fn_table->get_timeout_count ? \
+ bdev_io->bdev->fn_table->get_timeout_count(channel) : 0;
+ g_io_stat_map[stat->io_stat_id].poll_time_used = true;
+ g_io_stat_map[stat->io_stat_id].num_poll_timeout = num_poll_timeout;
+ }
+ }
+ }
+ }
+}
--
2.33.0

View File

@ -0,0 +1,176 @@
From c359ae7d0ee0593b032f2e2543442fa9f6df3827 Mon Sep 17 00:00:00 2001
From: sunshihao <sunshihao@huawei.com>
Date: Mon, 22 Feb 2021 19:58:17 +0800
Subject: [PATCH 19/27] lib/env_dpdk: Add config args for HSAK
Signed-off-by: sunshihao <sunshihao@huawei.com>
---
lib/env_dpdk/init.c | 7 +++++++
lib/event/reactor.c | 36 +++++++++++++++++++++++++++++---
lib/jsonrpc/jsonrpc_internal.h | 2 +-
lib/jsonrpc/jsonrpc_server_tcp.c | 4 ++--
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c
index e6464c9..3bb713d 100644
--- a/lib/env_dpdk/init.c
+++ b/lib/env_dpdk/init.c
@@ -398,6 +398,13 @@ build_eal_cmdline(const struct spdk_env_opts *opts)
#ifdef __linux__
+#ifdef SPDK_CONFIG_APP_RW
+ /* set IOVA use phys addr and keep same with DPDK16.11 */
+ args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa"));
+ if (args == NULL) {
+ return -1;
+ }
+#endif
if (opts->iova_mode) {
args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=%s", opts->iova_mode));
if (args == NULL) {
diff --git a/lib/event/reactor.c b/lib/event/reactor.c
index 724371c..9fb9e0f 100644
--- a/lib/event/reactor.c
+++ b/lib/event/reactor.c
@@ -42,6 +42,8 @@
#include "spdk/util.h"
#include "spdk/string.h"
#include "spdk/fd_group.h"
+#include "spdk_internal/thread.h"
+#include "spdk/conf.h"
#ifdef __linux__
#include <sys/prctl.h>
@@ -54,6 +56,10 @@
#define SPDK_EVENT_BATCH_SIZE 8
+#ifdef SPDK_CONFIG_APP_RW
+struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE];
+#endif
+
static struct spdk_reactor *g_reactors;
static uint32_t g_reactor_count;
static struct spdk_cpuset g_reactor_core_mask;
@@ -62,6 +68,7 @@ static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZE
static bool g_framework_context_switch_monitor_enabled = true;
static struct spdk_mempool *g_spdk_event_mempool = NULL;
+static int16_t g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE;
TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
= TAILQ_HEAD_INITIALIZER(g_scheduler_list);
@@ -250,6 +257,20 @@ spdk_reactors_init(void)
uint32_t i, current_core;
char mempool_name[32];
+#ifdef SPDK_CONFIG_APP_RW
+ struct spdk_conf_section *sp;
+ sp = spdk_conf_find_section(NULL, "Reactor");
+ if (sp != 0) {
+ g_reactor_batch_size = spdk_conf_section_get_intval(sp, "BatchSize");
+ if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) {
+ g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE;
+ }
+ syslog(LOG_INFO,"BatchSize is set to %d\n", g_reactor_batch_size);
+ } else {
+ SPDK_ERRLOG("config file does not contain [Reactor] section, which need to be provided\n");
+ }
+#endif
+
snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
g_spdk_event_mempool = spdk_mempool_create(mempool_name,
262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
@@ -557,7 +578,7 @@ event_queue_run_batch(struct spdk_reactor *reactor)
return -errno;
}
- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
+ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size);
if (spdk_ring_count(reactor->events) != 0) {
/* Trigger new notification if there are still events in event-queue waiting for processing. */
@@ -568,7 +589,7 @@ event_queue_run_batch(struct spdk_reactor *reactor)
}
}
} else {
- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
+ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size);
}
if (count == 0) {
@@ -948,6 +969,9 @@ reactor_run(void *arg)
}
if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
+#ifdef SPDK_CONFIG_APP_RW
+ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED;
+#endif
break;
}
}
@@ -1039,11 +1063,16 @@ spdk_reactors_start(void)
spdk_cpuset_zero(&tmp_cpumask);
spdk_cpuset_set_cpu(&tmp_cpumask, i, true);
+#ifdef SPDK_CONFIG_APP_RW
+ lcore_thread_info[reactor->lcore].thread = spdk_thread_create(thread_name, &tmp_cpumask);
+ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_RUNNING;
+#else
spdk_thread_create(thread_name, &tmp_cpumask);
+#endif
}
spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
}
-
+#ifndef SPDK_CONFIG_APP_RW
/* Start the main reactor */
reactor = spdk_reactor_get(current_core);
assert(reactor != NULL);
@@ -1052,6 +1081,7 @@ spdk_reactors_start(void)
spdk_env_thread_wait_all();
g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
+#endif
}
void
diff --git a/lib/jsonrpc/jsonrpc_internal.h b/lib/jsonrpc/jsonrpc_internal.h
index 4e5852e..331ee00 100644
--- a/lib/jsonrpc/jsonrpc_internal.h
+++ b/lib/jsonrpc/jsonrpc_internal.h
@@ -40,7 +40,7 @@
#include "spdk/log.h"
-#define SPDK_JSONRPC_RECV_BUF_SIZE (32 * 1024)
+#define SPDK_JSONRPC_RECV_BUF_SIZE (4 * 1024 * 1024)
#define SPDK_JSONRPC_SEND_BUF_SIZE_INIT (32 * 1024)
#define SPDK_JSONRPC_SEND_BUF_SIZE_MAX (32 * 1024 * 1024)
#define SPDK_JSONRPC_ID_MAX_LEN 128
diff --git a/lib/jsonrpc/jsonrpc_server_tcp.c b/lib/jsonrpc/jsonrpc_server_tcp.c
index 71f3b5c..5173aea 100644
--- a/lib/jsonrpc/jsonrpc_server_tcp.c
+++ b/lib/jsonrpc/jsonrpc_server_tcp.c
@@ -319,7 +319,7 @@ jsonrpc_server_conn_recv(struct spdk_jsonrpc_server_conn *conn)
}
offset += rc;
- } while (rc > 0);
+ } while (rc > 1000);
if (offset > 0) {
/*
@@ -375,7 +375,7 @@ more:
return 0;
}
- SPDK_DEBUGLOG(rpc, "send() failed: %s\n", spdk_strerror(errno));
+ SPDK_ERRLOG("send() failed: %s\n", spdk_strerror(errno));
return -1;
}
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
From 1447fa25369f107192be8fa9e5f21ec78f19dcf1 Mon Sep 17 00:00:00 2001
From: sunshihao <sunshihao@huawei.com>
Date: Mon, 1 Mar 2021 09:20:10 +0800
Subject: [PATCH 22/27] use spdk_nvme_ns_cmd_dataset_management and delete
spdk_nvme_ns_cmd_unmap_blocks
Signed-off-by: sunshihao520 <sunshihao@huawei.com>
---
include/spdk/nvme.h | 33 -----------------------------
lib/nvme/nvme_ns_cmd.c | 35 -------------------------------
module/bdev/nvme/bdev_nvme_self.c | 8 +++----
3 files changed, 4 insertions(+), 72 deletions(-)
diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h
index 6393db3..9acfb89 100644
--- a/include/spdk/nvme.h
+++ b/include/spdk/nvme.h
@@ -3549,39 +3549,6 @@ bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns);
bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns);
uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair);
-/**
- * Submit a data set management request to the specified NVMe namespace. Data set
- * management operations are designed to optimize interaction with the block
- * translation layer inside the device. The most common type of operation is
- * deallocate, which is often referred to as TRIM or UNMAP.
- *
- * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
- * The user must ensure that only one thread submits I/O on a given qpair at any
- * given time.
- *
- * This is a convenience wrapper that will automatically allocate and construct
- * the correct data buffers. Therefore, ranges does not need to be allocated from
- * pinned memory and can be placed on the stack. If a higher performance, zero-copy
- * version of DSM is required, simply build and submit a raw command using
- * spdk_nvme_ctrlr_cmd_io_raw().
- *
- * \param ns NVMe namespace to submit the DSM request
- * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute.
- * \param qpair I/O queue pair to submit the request
- * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs
- * to operate on.
- * \param num_ranges The number of elements in the ranges array.
- * \param cb_fn Callback function to invoke when the I/O is completed
- * \param cb_arg Argument to pass to the callback function
- *
- * \return 0 if successfully submitted, negated POSIX errno values otherwise.
- */
-int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
- uint32_t type,
- const struct spdk_nvme_dsm_range *ranges,
- uint16_t num_ranges,
- spdk_nvme_cmd_cb cb_fn,
- void *cb_arg);
/**
* \brief Submits a write I/O to the specified NVMe namespace.
*
diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c
index 37dcdc2..9b67b8e 100644
--- a/lib/nvme/nvme_ns_cmd.c
+++ b/lib/nvme/nvme_ns_cmd.c
@@ -1221,38 +1221,3 @@ spdk_nvme_ns_cmd_reservation_report(struct spdk_nvme_ns *ns,
return nvme_qpair_submit_request(qpair, req);
}
-
-#ifdef SPDK_CONFIG_APP_RW
-int
-spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
- uint32_t type,
- const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges,
- spdk_nvme_cmd_cb cb_fn, void *cb_arg)
-{
- struct nvme_request *req = NULL;
- struct spdk_nvme_cmd *cmd = NULL;
- struct nvme_payload payload;
-
- if (ranges == NULL) {
- return -EINVAL;
- }
-
- payload = NVME_PAYLOAD_CONTIG((void *)ranges, NULL);
-
- req = nvme_allocate_request(qpair, &payload, num_ranges * sizeof(struct spdk_nvme_dsm_range),
- 0, cb_fn, cb_arg);
- if (req == NULL) {
- return -ENOMEM;
- }
-
- req->user_cb_arg = cb_arg;
-
- cmd = &req->cmd;
- cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT;
- cmd->nsid = ns->id;
-
- cmd->cdw10 = num_ranges - 1;
- cmd->cdw11 = type;
-
- return nvme_qpair_submit_request(qpair, req);
-}
diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c
index 7371ecb..1419b1f 100644
--- a/module/bdev/nvme/bdev_nvme_self.c
+++ b/module/bdev/nvme/bdev_nvme_self.c
@@ -565,10 +565,10 @@ bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void
}
spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME);
- return spdk_nvme_ns_cmd_unmap_blocks(nbdev->nvme_ns->ns, nvme_ch->qpair,
- SPDK_NVME_DSM_ATTR_DEALLOCATE,
- unmap_d, unmap_count,
- bdev_nvme_queued_done, driver_ctx);
+ return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair,
+ SPDK_NVME_DSM_ATTR_DEALLOCATE,
+ unmap_d, unmap_count,
+ bdev_nvme_queued_done, driver_ctx);
}
void
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,55 @@
From 86162fca6435c4b5d98356f63ae32519fe485f02 Mon Sep 17 00:00:00 2001
From: suweifeng <suweifeng1@huawei.com>
Date: Mon, 17 May 2021 16:05:40 +0800
Subject: [PATCH 24/27] Add CUSE switch for nvme ctrlr
Signed-off-by: suweifeng <suweifeng1@huawei.com>
---
module/bdev/nvme/bdev_nvme.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c
index d291646..4f88e4e 100644
--- a/module/bdev/nvme/bdev_nvme.c
+++ b/module/bdev/nvme/bdev_nvme.c
@@ -137,6 +137,9 @@ static struct spdk_thread *g_bdev_nvme_init_thread;
static struct spdk_poller *g_hotplug_poller;
static struct spdk_poller *g_hotplug_probe_poller;
static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
+#ifdef SPDK_CONFIG_APP_RW
+bool g_useCUSE = false;
+#endif
static void nvme_ctrlr_populate_namespaces(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
struct nvme_async_probe_ctx *ctx);
@@ -1694,6 +1697,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
}
nvme_ctrlr_populate_namespaces(nvme_bdev_ctrlr, NULL);
+#ifdef SPDK_CONFIG_APP_RW
+ /* register CUSE */
+ if (g_useCUSE) {
+ spdk_nvme_cuse_register(ctrlr);
+ }
+#endif
free(name);
}
@@ -1720,6 +1729,14 @@ remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
return;
}
nvme_bdev_ctrlr->destruct = true;
+
+#ifdef SPDK_CONFIG_APP_RW
+ /* remove CUSE */
+ if (g_useCUSE) {
+ spdk_nvme_cuse_unregister(ctrlr);
+ }
+#endif
+
pthread_mutex_unlock(&g_bdev_nvme_mutex);
_nvme_bdev_ctrlr_destruct(nvme_bdev_ctrlr);
}
--
2.33.0

View File

@ -0,0 +1,45 @@
From c6239a3dc45a7cb3fa245cdcb5f0641959159714 Mon Sep 17 00:00:00 2001
From: suweifeng <suweifeng1@huawei.com>
Date: Thu, 20 May 2021 16:41:01 +0800
Subject: [PATCH 25/27] Adapt for ES3000 serial vendor special opcode in CUSE
With Huawei ES3000 serial NVMe PCIe SSD, Will send special opcode 0xC0
to get self-define vendor logs, the data transfer field of opcode didn't
follow NVMe 1.3/1.4 spec, So treat the opcode as bidirectional.
All self-define opcode start with 0xC0.
Signed-off-by: suweifeng <suweifeng1@huawei.com>
---
include/spdk/nvme_spec.h | 1 +
lib/nvme/nvme_cuse.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/include/spdk/nvme_spec.h b/include/spdk/nvme_spec.h
index ca91c8b..8058ea0 100644
--- a/include/spdk/nvme_spec.h
+++ b/include/spdk/nvme_spec.h
@@ -1345,6 +1345,7 @@ enum spdk_nvme_admin_opcode {
SPDK_NVME_OPC_SANITIZE = 0x84,
SPDK_NVME_OPC_GET_LBA_STATUS = 0x86,
+ SPDK_NVME_OPC_VENDOR = 0xC0,
};
/**
diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c
index 62d1422..3eccfd0 100644
--- a/lib/nvme/nvme_cuse.c
+++ b/lib/nvme/nvme_cuse.c
@@ -154,6 +154,9 @@ cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd,
ctx->req = req;
ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode);
+ if (admin_cmd->opcode >= SPDK_NVME_OPC_VENDOR) {
+ ctx->data_transfer = SPDK_NVME_DATA_BIDIRECTIONAL;
+ }
memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
ctx->nvme_cmd.opc = admin_cmd->opcode;
--
2.33.0

View File

@ -0,0 +1,124 @@
From 34555d211c58ac7615d41547f56756ae02d22957 Mon Sep 17 00:00:00 2001
From: suweifeng <suweifeng1@huawei.com>
Date: Tue, 8 Jun 2021 22:11:53 +0800
Subject: [PATCH 26/27] Fix race condition in continuous setup and teardown
cuse session
If we continuous setup and teardown cuse session, It will teardown
uninitialized cuse session and cause segment fault, So add delay until
session created.
Signed-off-by: suweifeng <suweifeng1@huawei.com>
---
lib/nvme/nvme_cuse.c | 41 +++++++++++++++++++++++++++++++++++++----
1 file changed, 37 insertions(+), 4 deletions(-)
diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c
index 3eccfd0..8f0be31 100644
--- a/lib/nvme/nvme_cuse.c
+++ b/lib/nvme/nvme_cuse.c
@@ -55,6 +55,8 @@ struct cuse_device {
pthread_t tid;
struct fuse_session *session;
+ pthread_cond_t session_cond; /* session condition variable */
+ pthread_mutex_t session_mtx; /* session mutex variable */
struct cuse_device *ctrlr_device;
struct cuse_device *ns_devices; /**< Array of cuse ns devices */
@@ -666,11 +668,17 @@ cuse_thread(void *arg)
cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
&multithreaded, cuse_device);
}
+
if (!cuse_device->session) {
SPDK_ERRLOG("Cannot create cuse session\n");
+ pthread_mutex_lock(&cuse_device->session_mtx);
+ pthread_cond_signal(&cuse_device->session_cond);
+ pthread_mutex_unlock(&cuse_device->session_mtx);
goto err;
}
-
+ pthread_mutex_lock(&cuse_device->session_mtx);
+ pthread_cond_signal(&cuse_device->session_cond);
+ pthread_mutex_unlock(&cuse_device->session_mtx);
SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
/* Receive and process fuse requests */
@@ -718,13 +726,20 @@ cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
free(ns_device);
return -ENAMETOOLONG;
}
-
+ pthread_cond_init(&ns_device->session_cond, NULL);
+ pthread_mutex_init(&ns_device->session_mtx, NULL);
rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
return -rv;
}
-
+ pthread_mutex_lock(&ns_device->session_mtx);
+ pthread_cond_wait(&ns_device->session_cond, &ns_device->session_mtx);
+ pthread_mutex_unlock(&ns_device->session_mtx);
+ if (!ns_device->session) {
+ SPDK_ERRLOG("create namespace session failed\n");
+ return -1;
+ }
ns_device->is_started = true;
return 0;
@@ -739,9 +754,10 @@ cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid)
if (!ns_device->is_started) {
return;
}
-
fuse_session_exit(ns_device->session);
pthread_join(ns_device->tid, NULL);
+ pthread_cond_destroy(&ns_device->session_cond);
+ pthread_mutex_destroy(&ns_device->session_mtx);
ns_device->is_started = false;
}
@@ -817,8 +833,14 @@ cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
cuse_nvme_ns_stop(ctrlr_device, i);
}
+ if (!ctrlr_device->is_started) {
+ return;
+ }
fuse_session_exit(ctrlr_device->session);
pthread_join(ctrlr_device->tid, NULL);
+ pthread_cond_destroy(&ctrlr_device->session_cond);
+ pthread_mutex_destroy(&ctrlr_device->session_mtx);
+ ctrlr_device->is_started = false;
TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
@@ -894,12 +916,23 @@ nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
ctrlr_device->index);
+ pthread_cond_init(&ctrlr_device->session_cond, NULL);
+ pthread_mutex_init(&ctrlr_device->session_mtx, NULL);
rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
if (rv != 0) {
SPDK_ERRLOG("pthread_create failed\n");
rv = -rv;
goto err3;
}
+ pthread_mutex_lock(&ctrlr_device->session_mtx);
+ pthread_cond_wait(&ctrlr_device->session_cond, &ctrlr_device->session_mtx);
+ pthread_mutex_unlock(&ctrlr_device->session_mtx);
+ if (!ctrlr_device->session) {
+ SPDK_ERRLOG("cuse session create failed\n");
+ rv = -1;
+ goto err3;
+ }
+ ctrlr_device->is_started = true;
TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
--
2.33.0

View File

@ -0,0 +1,28 @@
From 5f8b5846741c965b1b5ad7a8ca2960b20565d192 Mon Sep 17 00:00:00 2001
From: suweifeng <suweifeng1@huawei.com>
Date: Thu, 10 Jun 2021 11:25:17 +0800
Subject: [PATCH 27/27] Change log level in poll timeout
Change to 'NOTICE' log level in poll timeout
Signed-off-by: suweifeng <suweifeng1@huawei.com>
---
module/bdev/nvme/bdev_nvme_self.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c
index dc480ff..cba129e 100644
--- a/module/bdev/nvme/bdev_nvme_self.c
+++ b/module/bdev/nvme/bdev_nvme_self.c
@@ -36,7 +36,7 @@ void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group)
poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz();
if (poll_time >= g_polltime_threshold) {
group->num_poll_timeout++;
- SPDK_WARNLOG("group[%p] poll timeout in %ldms", group, poll_time);
+ SPDK_NOTICELOG("group[%p] poll timeout in %ldms", group, poll_time);
}
}
group->save_start_ticks = current_ticks;
--
2.33.0

View File

@ -3,7 +3,7 @@
Name: spdk
Version: 21.01.1
Release: 4
Release: 5
Summary: Set of libraries and utilities for high performance user-mode storage
License: BSD and MIT
URL: http://spdk.io
@ -24,6 +24,17 @@ Patch13: 0013-lib-vhost-Fix-compilation-with-dpdk-21.11.patch
Patch14: 0014-mk-Fix-debug-build-error-on-ARM-ThunderX2-and-neoverse_N1_platform.patch
Patch15: 0015-configure-add-gcc-version-check-for-ARM-Neoverse-N1_platform.patch
Patch16: 0016-Enhance-security-for-share-library.patch
Patch17: 0017-add-HSAK-needed-head-file-and-API-to-spdk.patch
Patch18: 0018-lib-bdev-Add-bdev-support-for-HSAK.patch
Patch19: 0019-lib-env_dpdk-Add-config-args-for-HSAK.patch
Patch20: 0020-lib-nvme-Add-nvme-support-for-HSAK.patch
Patch21: 0021-module-bdev-Add-bdev-module-support-for-HSAK.patch
Patch22: 0022-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch
Patch23: 0023-spdk-add-nvme-support-for-HSAK.patch
Patch24: 0024-Add-CUSE-switch-for-nvme-ctrlr.patch
Patch25: 0025-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch
Patch26: 0026-Fix-race-condition-in-continuous-setup-and-teardown-.patch
Patch27: 0027-Change-log-level-in-poll-timeout.patch
%define package_version %{version}-%{release}
@ -44,6 +55,8 @@ BuildRequires: gcc gcc-c++ make
BuildRequires: dpdk-devel, numactl-devel, ncurses-devel
BuildRequires: libiscsi-devel, libaio-devel, openssl-devel, libuuid-devel
BuildRequires: libibverbs-devel, librdmacm-devel
BuildRequires: fuse3, fuse3-devel
BuildRequires: libboundscheck
%if %{with doc}
BuildRequires: doxygen mscgen graphviz
%endif
@ -51,6 +64,7 @@ BuildRequires: doxygen mscgen graphviz
# Install dependencies
Requires: dpdk >= 21.11, numactl-libs, openssl-libs
Requires: libiscsi, libaio, libuuid
Requires: fuse3, libboundscheck
# NVMe over Fabrics
Requires: librdmacm, librdmacm
Requires(post): /sbin/ldconfig
@ -113,7 +127,9 @@ BuildArch: noarch
--with-rdma \
--with-shared \
--with-iscsi-initiator \
--without-vtune
--without-vtune \
--enable-raw \
--with-nvme-cuse
make -j`nproc` all
@ -123,6 +139,13 @@ make -C doc
%install
%make_install -j`nproc` prefix=%{_usr} libdir=%{_libdir} datadir=%{_datadir}
install -d $RPM_BUILD_ROOT%{_sysconfdir}/spdk
install -d $RPM_BUILD_ROOT/opt/spdk
install -d $RPM_BUILD_ROOT/usr/include/spdk_internal
install -m 0744 ./scripts/setup_self.sh $RPM_BUILD_ROOT/opt/spdk/setup.sh
install -m 0644 ./etc/spdk/nvme.conf.in $RPM_BUILD_ROOT%{_sysconfdir}/spdk
install -m 0644 include/spdk_internal/*.h $RPM_BUILD_ROOT/usr/include/spdk_internal
install -m 0644 lib/nvme/nvme_internal.h $RPM_BUILD_ROOT/usr/include/spdk_internal
# Install tools
mkdir -p %{install_datadir}
@ -157,12 +180,18 @@ mv doc/output/html/ %{install_docdir}
%files
%{_bindir}/spdk_*
%{_libdir}/*.so.*
%dir %{_sysconfdir}/spdk
%{_sysconfdir}/spdk/nvme.conf.in
%dir /opt/spdk
/opt/spdk/setup.sh
%files devel
%{_includedir}/%{name}
%{_libdir}/*.a
%{_libdir}/*.so
%dir /usr/include/spdk_internal
/usr/include/spdk_internal/*.h
%files tools
@ -177,6 +206,9 @@ mv doc/output/html/ %{install_docdir}
%changelog
* Tue May 24 2022 Weifeng Su <suweifeng1@huawei.com> - 21.01.1-5
- Add support for HSAK
* Tue Mar 15 2022 Weifeng Su <suweifeng1@huawei.com> - 21.01.1-4
- Remove rpath link option, Due to it's easy for attacher to
construct 'rpath' attacks