diff -ruN kexec_orig/kexec/arch/riscv/Makefile kexec_new/kexec/arch/riscv/Makefile --- kexec_orig/kexec/arch/riscv/Makefile 2024-09-21 04:10:02.056885713 +0800 +++ kexec_new/kexec/arch/riscv/Makefile 2024-09-21 04:27:49.269821895 +0800 @@ -3,6 +3,8 @@ # riscv_KEXEC_SRCS = kexec/arch/riscv/kexec-riscv.c riscv_KEXEC_SRCS += kexec/arch/riscv/kexec-elf-riscv.c +riscv_KEXEC_SRCS += kexec/arch/riscv/kexec-elf-utils-riscv.c +riscv_KEXEC_SRCS += kexec/arch/riscv/kexec-zImage-riscv.c riscv_KEXEC_SRCS += kexec/arch/riscv/kexec-image-riscv.c riscv_KEXEC_SRCS += kexec/arch/riscv/crashdump-riscv.c diff -ruN kexec_orig/kexec/arch/riscv/kexec-elf-utils-riscv.c kexec_new/kexec/arch/riscv/kexec-elf-utils-riscv.c --- kexec_orig/kexec/arch/riscv/kexec-elf-utils-riscv.c 1970-01-01 08:00:00.000000000 +0800 +++ kexec_new/kexec/arch/riscv/kexec-elf-utils-riscv.c 2024-09-21 04:27:49.277821062 +0800 @@ -0,0 +1,212 @@ + + +#include +#include +#include +#include +#include +#include "elf.h" +#include "../../kexec.h" + +#if __riscv_xlen == 64 +typedef Elf64_Ehdr Elf_Ehdr; +typedef Elf64_Phdr Elf_Phdr; +typedef Elf64_Shdr Elf_Shdr; +typedef Elf64_Sym Elf_Sym; +typedef uint64_t xlen_t; +#define ELFCLASS ELFCLASS64 +#elif __riscv_xlen == 32 +typedef Elf32_Ehdr Elf_Ehdr; +typedef Elf32_Phdr Elf_Phdr; +typedef Elf32_Shdr Elf_Shdr; +typedef Elf32_Sym Elf_Sym; +typedef uint32_t xlen_t; +#define ELFCLASS ELFCLASS32 +#else +#error Unsupported ISA length +#endif + +#define ELF_EXTRA_SIZE 0x1000 +#define KERNEL_SECTION_COUNT 1 +#define EXTRA_SECTION_COUNT 3 /* shstrtab + strtab + symtab */ + +static Elf_Ehdr eh = +{ + .e_ident = {ELFMAG0, 'E', 'L', 'F', ELFCLASS, ELFDATA2LSB, EV_CURRENT}, + .e_type = 2, + .e_machine = EM_RISCV, + .e_version = 1, + .e_phoff = sizeof(Elf_Ehdr), + .e_flags = 0x1, /* RVC */ + .e_ehsize = sizeof(Elf_Ehdr), + .e_phentsize = sizeof(Elf_Phdr), + .e_shentsize = sizeof(Elf_Shdr), +}; + +static char section_header_string_table[] = "\x00.shstrtab\x00.text\x00.strtab\x00.symtab"; +static char string_table[] = "\x00r\x00"; + + +/* Add ELF wrapper to raw kernel image */ +char * imageToElf(const char *kernel_buf, unsigned long kernel_size) +{ + Elf_Phdr ph; + Elf_Shdr sh; + Elf_Sym st; + xlen_t elf_offset, entry_addr = 1; + char *elf_buf; + xlen_t elf_write_offset = 0; + + elf_buf = malloc(kernel_size + ELF_EXTRA_SIZE); + + /* determine address that does not overlap */ + struct memory_range *range; + int ranges; + unsigned long start, end; + get_memory_ranges(&range, &ranges, 0); + for (int i = 0; i < ranges; i++) { + if (range[i].type != RANGE_RAM) + continue; + start = range[i].start; + end = range[i].end; + + start = _ALIGN_UP(start, 0x200000); + if (end > start && ((end - start) >= kernel_size)) { + entry_addr = start; + break; + } + } + if (entry_addr == 1) { + fprintf(stderr, "Cannot find proper memory range for given ELF\n"); + free(elf_buf); + return NULL; + } + + /* write ELF Header */ + eh.e_entry = entry_addr; + eh.e_phnum = KERNEL_SECTION_COUNT; + eh.e_shnum = 1 /* first NULL section */ + KERNEL_SECTION_COUNT + EXTRA_SECTION_COUNT; + eh.e_shstrndx = KERNEL_SECTION_COUNT + EXTRA_SECTION_COUNT; /* index to Elf_Shdr for shstrtab */ + eh.e_shoff = eh.e_phoff + KERNEL_SECTION_COUNT * sizeof(Elf_Phdr) + kernel_size; + memcpy(elf_buf, &eh, sizeof(eh)); + elf_write_offset += sizeof(eh); + + elf_offset = eh.e_phoff + KERNEL_SECTION_COUNT * sizeof(Elf_Phdr); + + /* write Program Headers */ + ph.p_type = 1; + ph.p_offset = elf_offset; + ph.p_vaddr = ph.p_paddr = entry_addr; + ph.p_filesz = ph.p_memsz = kernel_size; + ph.p_flags = 0x5; + ph.p_align = 1; + + memcpy(elf_buf + elf_write_offset, &ph, sizeof(ph)); + elf_write_offset += sizeof(ph); + + /* write kernel image */ + memcpy(elf_buf + elf_write_offset, kernel_buf, kernel_size); + elf_write_offset += kernel_size; + + /* write prefix Section Header */ + + memset(&sh, 0, sizeof(sh)); + memcpy(elf_buf + elf_write_offset, &sh, sizeof(sh)); + elf_write_offset += sizeof(sh); + + /* write kernel Section Header */ + sh.sh_name = 11; // point to .text in shstrtab + sh.sh_type = SHT_PROGBITS; + sh.sh_flags = SHF_ALLOC | SHF_EXECINSTR; + sh.sh_addr = entry_addr; + sh.sh_offset = elf_offset; + sh.sh_size = kernel_size; + sh.sh_link = 0; + sh.sh_info = 0; + sh.sh_addralign = 0; + sh.sh_entsize = 0; + + memcpy(elf_buf + elf_write_offset, &sh, sizeof(sh)); + elf_write_offset += sizeof(sh); + + elf_offset += kernel_size; + elf_offset += (1 + KERNEL_SECTION_COUNT + EXTRA_SECTION_COUNT) * sizeof(sh); + + /* write Section Header for extra_sections */ + + sh.sh_name = 25; // point to .symtab in shstrtab + sh.sh_type = SHT_SYMTAB; + sh.sh_flags = 0; + sh.sh_addr = 0; + sh.sh_offset = elf_offset; + sh.sh_size = (KERNEL_SECTION_COUNT + 1) * sizeof(Elf_Sym); + sh.sh_link = 2 + KERNEL_SECTION_COUNT; + sh.sh_info = 2; + sh.sh_addralign = 4; + sh.sh_entsize = sizeof(Elf_Sym); + + memcpy(elf_buf + elf_write_offset, &sh, sizeof(sh)); + elf_write_offset += sizeof(sh); + + elf_offset += sh.sh_size; + + sh.sh_name = 17; // point to .strtab in shstrtab + sh.sh_type = SHT_STRTAB; + sh.sh_flags = 0; + sh.sh_addr = 0; + sh.sh_offset = elf_offset; + sh.sh_size = sizeof(string_table); + sh.sh_link = 0; + sh.sh_info = 0; + sh.sh_addralign = 1; + sh.sh_entsize = 0; + + memcpy(elf_buf + elf_write_offset, &sh, sizeof(sh)); + elf_write_offset += sizeof(sh); + + elf_offset += sh.sh_size; + + sh.sh_name = 1; // point to .shstrtab in shstrtab + sh.sh_type = SHT_STRTAB; + sh.sh_flags = 0; + sh.sh_addr = 0; + sh.sh_offset = elf_offset; + sh.sh_size = sizeof(section_header_string_table); + sh.sh_link = 0; + sh.sh_info = 0; + sh.sh_addralign = 1; + sh.sh_entsize = 0; + + memcpy(elf_buf + elf_write_offset, &sh, sizeof(sh)); + elf_write_offset += sizeof(sh); + + elf_offset += sh.sh_size; + + /* write Symbol Table */ + + memset(&st, 0, sizeof(st)); + memcpy(elf_buf + elf_write_offset, &st, sizeof(st)); + elf_write_offset += sizeof(st); + + st.st_name = 1; /* point to r in strtab */ + st.st_value = entry_addr; + st.st_size = 0; + st.st_info = 0; + st.st_other = 0; + st.st_shndx = 1; + memcpy(elf_buf + elf_write_offset, &st, sizeof(st)); + elf_write_offset += sizeof(st); + + /* write String Table */ + + memcpy(elf_buf + elf_write_offset, &string_table, sizeof(string_table)); + elf_write_offset += sizeof(string_table); + + /* write Section Header String Table */ + + memcpy(elf_buf + elf_write_offset, §ion_header_string_table, sizeof(section_header_string_table)); + elf_write_offset += sizeof(section_header_string_table); + + return elf_buf; +} + diff -ruN kexec_orig/kexec/arch/riscv/kexec-riscv.c kexec_new/kexec/arch/riscv/kexec-riscv.c --- kexec_orig/kexec/arch/riscv/kexec-riscv.c 2024-09-21 05:07:29.701070532 +0800 +++ kexec_new/kexec/arch/riscv/kexec-riscv.c 2024-09-21 04:27:49.281820646 +0800 @@ -17,13 +17,24 @@ #include "kexec-riscv.h" #include "iomem.h" #include -#include #include +#include +#include + #ifndef _O_BINARY #define _O_BINARY 0 #endif +#if __riscv_xlen == 64 +#define DT_SIZE 8 +#define bswap __builtin_bswap64 +#else +#define DT_SIZE 4 +#define bswap __builtin_bswap32 +#endif + +static const char *dirpath = "/proc/device-tree/reserved-memory"; const struct arch_map_entry arches[] = { { "riscv32", KEXEC_ARCH_RISCV }, @@ -35,6 +46,7 @@ struct file_type file_type[] = { {"elf-riscv", elf_riscv_probe, elf_riscv_load, elf_riscv_usage}, {"image-riscv", image_riscv_probe, image_riscv_load, image_riscv_usage}, + {"zImage", zImage_riscv_probe, zImage_riscv_load, zImage_riscv_usage}, }; int file_types = sizeof(file_type) / sizeof(file_type[0]); @@ -45,7 +57,7 @@ " --command-line=STRING Use STRING as the kernel's command line.\n" " --reuse-cmdline Use kernel command line from running system.\n"; -static struct riscv_opts arch_options = {0}; +struct riscv_opts arch_options = {0}; static struct fdt_image provided_fdt = {0}; /****************\ @@ -116,7 +128,7 @@ uint64_t initrd_base = 0; uint64_t start = 0; uint64_t end = 0; - uint64_t min_usable = kernel_base + kernel_size; + uint64_t min_usable = kernel_base + kernel_size + BSS_PADDING; uint64_t max_usable = max_addr; int ret = 0; @@ -148,7 +160,7 @@ fprintf(stderr, "Couldn't add usable-memory-range to fdt\n"); return ret; } - + min_usable = start + kernel_size + BSS_PADDING; max_usable = end; } else { /* @@ -458,6 +470,50 @@ *range = sysmem_ranges.ranges; *num_ranges = sysmem_ranges.size; + // iomem can miss reservation for firmwares, add from device tree + DIR *dir = opendir(dirpath); + if (!dir) { + fprintf(stderr, "Cannot found reservation info from devicetree, skipping\n"); + } else { + struct dirent *dir_ent; + struct stat stat_buf; + while ((dir_ent = readdir(dir)) != NULL) { + if (dir_ent->d_type == DT_DIR) { + if (strcmp(dir_ent->d_name, ".") == 0 || + strcmp(dir_ent->d_name, "..") == 0) { + continue; + } + char full_path[PATH_MAX]; + char reg_path[PATH_MAX + 4]; + snprintf(full_path, PATH_MAX, "%s/%s", dirpath, dir_ent->d_name); + if (stat(full_path, &stat_buf) == 0 && S_ISDIR(stat_buf.st_mode)) { + // found reservation region + snprintf(reg_path, PATH_MAX, "%s/reg", full_path); + fp = fopen(reg_path, "r"); + if (!fp) { + fprintf(stderr, "Cannot determine reservation range, check your device tree!\n"); + return -ENOMEM; + } + unsigned long long length, succ; + succ = fread(&start, DT_SIZE, 1, fp); // just skip + if (!succ) goto skip_reserve; + succ = fread(&length, DT_SIZE, 1, fp); + if (!succ) goto skip_reserve; + // big endian to little endian + start = bswap(start); + length = bswap(length); + end = start + length - 1; + dbgprintf("Reservation from FDT %llx - %llx size %llx\n", start, end, length); + struct memory_range reserved_range; + reserved_range.start = start; + reserved_range.end = end; + reserved_range.type = RANGE_RESERVED; + mem_regions_exclude(&sysmem_ranges, &reserved_range); + skip_reserve: + } + } + } + } dbgprint_mem_range("System RAM ranges;", sysmem_ranges.ranges, sysmem_ranges.size); diff -ruN kexec_orig/kexec/arch/riscv/kexec-riscv.h kexec_new/kexec/arch/riscv/kexec-riscv.h --- kexec_orig/kexec/arch/riscv/kexec-riscv.h 2024-09-21 04:10:02.080885907 +0800 +++ kexec_new/kexec/arch/riscv/kexec-riscv.h 2024-09-21 04:27:49.285820230 +0800 @@ -15,6 +15,10 @@ #define KERNEL_ALIGN 0x400000 #endif +// kernel size may not include BSS section +// leave space for them +#define BSS_PADDING 0x4000000 + struct fdt_image { char *buf; off_t size; @@ -51,3 +55,8 @@ void image_riscv_usage(void); int image_riscv_load(int argc, char **argv, const char *buf, off_t len, struct kexec_info *info); + +int zImage_riscv_probe(const char *kernel_buf, off_t kernel_size); +int zImage_riscv_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info); +void zImage_riscv_usage(void); diff -ruN kexec_orig/kexec/arch/riscv/kexec-zImage-riscv.c kexec_new/kexec/arch/riscv/kexec-zImage-riscv.c --- kexec_orig/kexec/arch/riscv/kexec-zImage-riscv.c 1970-01-01 08:00:00.000000000 +0800 +++ kexec_new/kexec/arch/riscv/kexec-zImage-riscv.c 2024-09-21 04:27:49.285820230 +0800 @@ -0,0 +1,234 @@ +/* + * RISC-V kexec zImage (Image.gz) support. + * Modified based on ARM64 implementaton + * + * Several distros install the RISC-V + * Image.gz compressed file inside the boot destination + * directory (for e.g. /boot). + * + * Currently we cannot use kexec_file_load() to load vmlinuz + * (or Image.gz). + * + * To support Image.gz, we should: + * a). Copy the contents of Image.gz to a temporary file. + * b). Decompress (gunzip-decompress) the contents inside the + * temporary file. + * c). Pass the 'fd' of the temporary file to the kernel space. + * + * So basically the kernel space still gets a decompressed + * kernel image to load via kexec-tools. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include "image-header.h" +#include "kexec.h" +#include "kexec-riscv.h" +#include "kexec-syscall.h" +#include "kexec-zlib.h" +#include "arch/options.h" + +extern struct riscv_opts arch_options; + +#define FILENAME_IMAGE "/tmp/ImageXXXXXX" + +/* Returns: + * -1 : in case of error/invalid format (not a valid Image.gz format. + * fd : File descriptor of the temp file containing the decompressed + * Image. + */ +int zImage_riscv_probe(const char *kernel_buf, off_t kernel_size) +{ + int ret = -1; + int fd = 0; + int kernel_fd = 0; + char *fname = NULL; + char *kernel_uncompressed_buf = NULL; + const struct riscv_image_header *h; + + if (!is_zlib_file(kernel_buf, &kernel_size)) { + dbgprintf("%s: Not an zImage file (Image.gz).\n", __func__); + return -1; + } + + if (!(fname = strdup(FILENAME_IMAGE))) { + dbgprintf("%s: Can't duplicate strings\n", __func__); + return -1; + } + + if ((fd = mkstemp(fname)) < 0) { + dbgprintf("%s: Can't open file %s\n", __func__, + fname); + ret = -1; + goto fail_mkstemp; + } + + kernel_uncompressed_buf = + (char *) calloc(kernel_size, sizeof(off_t)); + if (!kernel_uncompressed_buf) { + dbgprintf("%s: Can't calloc %ld bytes\n", + __func__, kernel_size); + ret= -ENOMEM; + goto fail_calloc; + } + + /* slurp in the input kernel */ + dbgprintf("%s: ", __func__); + kernel_uncompressed_buf = slurp_decompress_file(kernel_buf, + &kernel_size); + dbgprintf("Probe decompression\n"); + /* check for correct header magic */ + if (kernel_size < sizeof(struct riscv_image_header)) { + dbgprintf("%s: No risc-v image header.\n", __func__); + ret = -1; + goto fail_bad_header; + } + + h = (const struct riscv_image_header *)(kernel_uncompressed_buf); + + if (!riscv_header_check_magic(h)) { + dbgprintf("%s: Bad risc-v image header.\n", __func__); + ret = -1; + goto fail_bad_header; + } + + /* convert to ELF image */ + char *old_buf = kernel_uncompressed_buf; + + extern char *imageToElf(char*, unsigned long); + kernel_uncompressed_buf = imageToElf(kernel_uncompressed_buf, kernel_size); + free(old_buf); + kernel_size += 0x1000; + + if (write(fd, kernel_uncompressed_buf, + kernel_size) != kernel_size) { + dbgprintf("%s: Can't write the uncompressed file %s\n", + __func__, fname); + ret = -1; + goto fail_bad_header; + } + + // Padding for bss + char *buf = (char *)malloc(BSS_PADDING); + memset(buf, 0, BSS_PADDING); + if (write(fd, buf, BSS_PADDING) != BSS_PADDING) { + dbgprintf("Can't write padding file %s\n", fname); + ret = -1; + goto fail_bad_header; + } + + close(fd); + + /* Open the tmp file again, this time in O_RDONLY mode, as + * opening the file in O_RDWR and calling kexec_file_load() + * causes the kernel to return -ETXTBSY + */ + kernel_fd = open(fname, O_RDONLY); + if (kernel_fd == -1) { + dbgprintf("%s: Failed to open file %s\n", + __func__, fname); + ret = -1; + goto fail_bad_header; + } + + unlink(fname); + + free(kernel_uncompressed_buf); + free(fname); + + return kernel_fd; + +fail_bad_header: + free(kernel_uncompressed_buf); + +fail_calloc: + if (fd >= 0) + close(fd); + + unlink(fname); + +fail_mkstemp: + free(fname); + + return ret; +} + +int zImage_riscv_load(int argc, char **argv, const char *kernel_buf, + off_t kernel_size, struct kexec_info *info) +{ + const struct riscv_image_header *header; + unsigned long text_offset, image_size; + off_t new_base_addr = 0; + int result = -1; + + if (info->file_mode) { + if (arch_options.initrd_path) { + info->initrd_fd = open(arch_options.initrd_path, O_RDONLY); + if (info->initrd_fd == -1) { + fprintf(stderr, + "Could not open initrd file %s:%s\n", + arch_options.initrd_path, strerror(errno)); + result = EFAILED; + goto exit; + } + } + + if (arch_options.cmdline) { + info->command_line = (char *)arch_options.cmdline; + info->command_line_len = + strlen(arch_options.cmdline) + 1; + } + + return 0; + } + + header = (const struct riscv_image_header *)(kernel_buf); + text_offset = riscv_header_text_offset(header); + image_size = riscv_header_image_size(header); + int ret; + ret = riscv_find_pbase(info, &new_base_addr, image_size, text_offset); + if (ret < 0) { + fprintf(stderr, "Could not find a memory region for the " + "provided Image\n"); + goto exit; + } + + dbgprintf("kernel base %lx\n", new_base_addr); + dbgprintf("kernel size %lx\n", kernel_size); + dbgprintf("text_offset %lx\n", text_offset); + dbgprintf("image size %lx\n", image_size); + + /* create and initialize elf core header segment */ + if (info->kexec_flags & KEXEC_ON_CRASH) { + result = load_elfcorehdr(info); + if (result) { + dbgprintf("%s: Creating eflcorehdr failed.\n", + __func__); + goto exit; + } + } + + /* load the kernel */ + add_segment_phys_virt(info, kernel_buf, kernel_size, + new_base_addr, + image_size, 0); + + /* load additional data */ + result = load_extra_segments(info, text_offset, image_size, ULONG_MAX); + +exit: + if (result) + fprintf(stderr, "kexec: load failed.\n"); + return result; +} + +void zImage_riscv_usage(void) +{ + printf( +" An RICS-V zImage, compressed, big or little endian.\n" +" Typically an Image.gz or Image.lzma file.\n\n"); +} diff -ruN kexec_orig/kexec/kexec.c kexec_new/kexec/kexec.c --- kexec_orig/kexec/kexec.c 2024-09-21 04:33:19.962930418 +0800 +++ kexec_new/kexec/kexec.c 2024-09-21 04:34:46.596691847 +0800 @@ -650,7 +650,6 @@ return; } arch_update_purgatory(info); - if (info->skip_checks) { unsigned int tmp = 1; @@ -1302,16 +1301,17 @@ kernel_buf = slurp_decompress_file(kernel, &kernel_size); for (i = 0; i < file_types; i++) { -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(__riscv__) || defined(__riscv) /* handle Image.gz like cases */ if (is_zlib_file(kernel, &kernel_size)) { if ((ret = file_type[i].probe(kernel, kernel_size)) >= 0) { kernel_fd = ret; break; } - } else + } else { if (file_type[i].probe(kernel_buf, kernel_size) >= 0) break; + } #else if (file_type[i].probe(kernel_buf, kernel_size) >= 0) break;