diff --git a/src/elfmerge/elf_link_elf.c b/src/elfmerge/elf_link_elf.c index 7d363dc3883192289040fba5b1ce91b291ce5fdd..7ccd53328bd6d5135312d47b1d171855866f6809 100644 --- a/src/elfmerge/elf_link_elf.c +++ b/src/elfmerge/elf_link_elf.c @@ -1327,6 +1327,7 @@ static void modify_elf_header(elf_link_t *elf_link) elf_set_hugepage(elf_link); } +#ifdef CONFIG_SYSBOOST_STAGING /* debug modify start */ #include @@ -1503,9 +1504,6 @@ int dwarf_modify_di_abbrev(Dwarf_Die die, void *di_ptr, struct dwarf_bias_info * break; case DW_FORM_strp: *dst_ptr += bias_info->debug_str; - // printf("offset: %lx, *abbrev_ptr: %x *dst_ptr: %x\n", - // (abbrev_ptr - di_base), - // *(uint32_t *)abbrev_ptr, *dst_ptr); break; case DW_FORM_data1: case DW_FORM_data2: @@ -1676,6 +1674,7 @@ static void modify_debug(elf_link_t *elf_link) } /* debug modify end */ +#endif /* CONFIG_SYSBOOST_STAGING*/ // .init_array first func is frame_dummy, frame_dummy call register_tm_clones // .fini_array first func is __do_global_dtors_aux, __do_global_dtors_aux call deregister_tm_clones @@ -1867,8 +1866,6 @@ int elf_link_write(elf_link_t *elf_link) // .rela.init .rela.text .rela.rodata .rela.tdata .rela.init_array .rela.data modify_local_call(elf_link); - modify_debug(elf_link); - // modify ELF header and write sections modify_elf_header(elf_link); diff --git a/src/elfmerge/meson.build b/src/elfmerge/meson.build index d425113b4bf0c84b8823a39ab89646ce634a8937..1e0d0a973a70b4f593c770eb42d5389391ea7fc3 100644 --- a/src/elfmerge/meson.build +++ b/src/elfmerge/meson.build @@ -18,11 +18,6 @@ core_sources = files( cflags += ['-fpic', '-pie'] -default_ldflags += ['/usr/lib64/libdwarf.a'] - -includes += '/usr/src/debug/libdwarf-0.7.0-1.oe2309.aarch64/src/lib/libdwarf/' -includes += '/usr/include/libdwarf-0/' - executable( 'elfmerge', core_sources, install: true, diff --git a/src/sysboost_loader/binfmt_rto.c b/src/sysboost_loader/binfmt_rto.c index dd7abe35efc90d7817a2de28c9487cbe424236d2..570a3936ec61e4e6c0a3aa6d40bc791ab1bd820c 100644 --- a/src/sysboost_loader/binfmt_rto.c +++ b/src/sysboost_loader/binfmt_rto.c @@ -10,6 +10,7 @@ * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). */ +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -51,8 +54,6 @@ #include #ifdef CONFIG_X86 #include -/* x86, 22.03 LTS map_vdso is undefine */ -extern int map_vdso(const struct vdso_image *image, unsigned long addr); #endif #include "main.h" #include "binfmt_rto.h" @@ -62,11 +63,7 @@ extern int map_vdso(const struct vdso_image *image, unsigned long addr); #endif #ifdef CONFIG_ELF_SYSBOOST -#include "../elf_ext.h" - -/* compat 22.03 LTS, 22.03 LTS SP2 */ -#ifndef MM_SAVED_AUXV -#define MM_SAVED_AUXV(mm) mm->saved_auxv +#include "../elfmerge/elf_ext.h" #endif #define proc_symbol(SYM) typeof(SYM) *(SYM) @@ -98,6 +95,7 @@ static struct global_symbols { proc_symbol(task_cputime); proc_symbol(thread_group_cputime); proc_symbol(do_mm_populate); + proc_symbol(get_mm_exe_file); } rto_sym; #define proc_symbol_char(x) #x @@ -128,6 +126,7 @@ static char *global_symbol_names[] = { proc_symbol_char(task_cputime), proc_symbol_char(thread_group_cputime), proc_symbol_char(do_mm_populate), + proc_symbol_char(get_mm_exe_file), }; static int init_symbols(void) @@ -142,20 +141,6 @@ static int init_symbols(void) return 0; } -#ifdef ELF_HWCAP -#undef ELF_HWCAP -#define ELF_HWCAP (__cpu_get_elf_hwcap()) -static inline unsigned long __cpu_get_elf_hwcap(void) -{ -#ifdef CONFIG_ARM64 - return rto_sym.cpu_get_elf_hwcap(); -#else - // x86 boot_cpu_data is export - return (boot_cpu_data.x86_capability[CPUID_1_EDX]); -#endif -} -#endif - #ifdef ELF_HWCAP2 #undef ELF_HWCAP2 #define ELF_HWCAP2 (__cpu_get_elf_hwcap2()) @@ -170,6 +155,38 @@ static inline unsigned long __cpu_get_elf_hwcap2(void) } #endif +#ifdef CONFIG_ARM64 +#ifdef start_thread +#undef start_thread +#endif + +#define start_thread ___start_thread + +// arm64 start_thread is inline function, so copy it +static inline void ___start_thread(struct pt_regs *regs, unsigned long pc, + unsigned long sp) +{ + start_thread_common(regs, pc); + regs->pstate = PSR_MODE_EL0t; + rto_sym.spectre_v4_enable_task_mitigation(current); + regs->sp = sp; +} +#endif /* CONFIG_ARM64 */ + +#ifdef ELF_HWCAP +#undef ELF_HWCAP +#define ELF_HWCAP (__cpu_get_elf_hwcap()) +static inline unsigned long __cpu_get_elf_hwcap(void) +{ +#ifdef CONFIG_ARM64 + return rto_sym.cpu_get_elf_hwcap(); +#else + // x86 boot_cpu_data is export + return (boot_cpu_data.x86_capability[CPUID_1_EDX]); +#endif +} +#endif + #ifdef ARCH_DLINFO #undef ARCH_DLINFO #endif @@ -194,12 +211,6 @@ do { \ // TODO: vdso layout for ARM64 #define __arch_setup_additional_pages(bprm, uses_interp, load_bias, is_rto_format) (rto_sym.arch_setup_additional_pages(bprm, uses_interp)) -#ifdef arch_elf_adjust_prot -#undef arch_elf_adjust_prot -#endif - -#define arch_elf_adjust_prot rto_sym.arch_elf_adjust_prot - #else // x86 #ifdef get_sigframe_size @@ -244,6 +255,21 @@ int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp, un #endif + +#ifdef CONFIG_ELF_SYSBOOST + +#define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size */ +#define AT_RSEQ_ALIGN 28 /* rseq allocation alignment */ + +/* compat 22.03 LTS, 22.03 LTS SP2 */ +#ifndef MM_SAVED_AUXV +#define MM_SAVED_AUXV(mm) mm->saved_auxv +#endif + +#ifdef CONFIG_X86 +extern int map_vdso(const struct vdso_image *image, unsigned long addr); +#endif + #endif /* CONFIG_ELF_SYSBOOST */ #ifndef ELF_COMPAT @@ -280,6 +306,12 @@ static int elf_core_dump(struct coredump_params *cprm); #define elf_core_dump NULL #endif +#if ELF_EXEC_PAGESIZE > PAGE_SIZE +#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE +#else +#define ELF_MIN_ALIGN PAGE_SIZE +#endif + #ifndef ELF_CORE_EFLAGS #define ELF_CORE_EFLAGS 0 #endif @@ -288,8 +320,10 @@ static struct linux_binfmt elf_format = { .module = THIS_MODULE, .load_binary = load_rto_binary, .load_shlib = load_elf_library, +#ifdef CONFIG_COREDUMP .core_dump = elf_core_dump, .min_coredump = ELF_EXEC_PAGESIZE, +#endif }; #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE)) @@ -389,6 +423,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, unsigned char k_rand_bytes[16]; int items; elf_addr_t *elf_info; + elf_addr_t flags = 0; int ei_index; const struct cred *cred = current_cred(); struct vm_area_struct *vma; @@ -560,15 +595,25 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, return 0; } +#ifdef CONFIG_ELF_SYSBOOST static unsigned long elf_map(struct file *filep, unsigned long addr, const struct elf_phdr *eppnt, int prot, int type, unsigned long total_size, bool use_pmd_mapping, bool is_exec_seg) +#else +static unsigned long elf_map(struct file *filep, unsigned long addr, + const struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) +#endif { - unsigned long map_addr, size, off; + unsigned long map_addr; + +#ifdef CONFIG_ELF_SYSBOOST + unsigned long size, off; if (use_pmd_mapping) { size = eppnt->p_filesz + ELF_HPAGEOFFSET(eppnt->p_vaddr); off = eppnt->p_offset - ELF_HPAGEOFFSET(eppnt->p_vaddr); + pr_info("vm_mmap, addr: %lx, size: %lx, off: %lx", addr, size, off); addr = ELF_HPAGESTART(addr); if (is_exec_seg) size = ELF_HPAGEALIGN(size); @@ -580,6 +625,12 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, addr = ELF_PAGESTART(addr); size = ELF_PAGEALIGN(size); } +#else + unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); + addr = ELF_PAGESTART(addr); + size = ELF_PAGEALIGN(size); +#endif /* mmap() will return -EINVAL if given a zero size, but a * segment with zero filesize is perfectly valid */ @@ -595,21 +646,21 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, * the end. (which unmap is needed for ELF images with holes.) */ if (total_size) { - if (use_pmd_mapping) - total_size = ELF_HPAGEALIGN(total_size); - else - total_size = ELF_PAGEALIGN(total_size); + total_size = ELF_PAGEALIGN(total_size); if (debug) pr_info("vm_mmap, addr: %lx, total_size: %lx, off: %lx", addr, total_size, off); map_addr = vm_mmap(filep, addr, total_size, prot, type, off); - if (!BAD_ADDR(map_addr)) + if (!BAD_ADDR(map_addr)) { vm_munmap(map_addr+size, total_size-size); + pr_info("vm_mmap total_size, map_addr: %lx", map_addr); + } } else { if (debug) pr_info("vm_mmap, addr: %lx, size: %lx, off: %lx", addr, size, off); map_addr = vm_mmap(filep, addr, size, prot, type, off); + pr_info("vm_mmap size, map_addr: %lx", map_addr); } if ((type & MAP_FIXED_NOREPLACE) && @@ -805,7 +856,7 @@ static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, if (p_flags & PF_X) prot |= PROT_EXEC; - return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); + return rto_sym.arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); } /* This is much more generalized than the library routine read function, @@ -813,10 +864,17 @@ static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, is only provided so that we can read a.out libraries that have an ELF header */ +#ifdef CONFIG_ELF_SYSBOOST static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, struct file *interpreter, unsigned long no_base, struct elf_phdr *interp_elf_phdata, struct arch_elf_state *arch_state, bool is_rto_format) +#else +static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, + struct file *interpreter, + unsigned long no_base, struct elf_phdr *interp_elf_phdata, + struct arch_elf_state *arch_state) +#endif { struct elf_phdr *eppnt; unsigned long load_addr = 0; @@ -872,9 +930,13 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; +#ifdef CONFIG_ELF_SYSBOOST map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size, false, false); - +#else + map_addr = elf_map(interpreter, load_addr + vaddr, + eppnt, elf_prot, elf_type, total_size); +#endif total_size = 0; error = map_addr; if (BAD_ADDR(map_addr)) @@ -1060,41 +1122,6 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, } #ifdef CONFIG_ELF_SYSBOOST -struct file *try_get_rto_file(struct file *file) -{ - char *buffer, *rto_path; - struct file *rto_file; - - buffer = kmalloc(PATH_MAX, GFP_KERNEL); - rto_path = file_path(file, buffer, PATH_MAX - 5); - strcat(rto_path, ".rto"); - rto_file = open_exec(rto_path); - - kfree(buffer); - return rto_file; -} - -void *load_bprm_buf(struct file *file) -{ - ssize_t ret; - char *buffer; - loff_t pos = 0; - - buffer = kmalloc(BINPRM_BUF_SIZE, GFP_KERNEL); - if (!buffer) - return ERR_PTR(-ENOMEM); - - ret = kernel_read(file, buffer, BINPRM_BUF_SIZE, &pos); - if (ret != BINPRM_BUF_SIZE) { - kfree(buffer); - if (ret < 0) - return ERR_PTR(ret); - return ERR_PTR(-EIO); - } - - return buffer; -} - static int prepare_rto(struct linux_binprm *bprm) { void *buffer; @@ -1114,14 +1141,17 @@ static inline int try_replace_file(struct linux_binprm *bprm) int ret; rto_file = try_get_rto_file(bprm->file); - if (IS_ERR(rto_file)) + if (IS_ERR(rto_file)) { + pr_info("try_get_rto_file fail %ld\n", PTR_ERR(rto_file)); return PTR_ERR(rto_file); + } original_file = bprm->file; bprm->file = rto_file; ret = prepare_rto(bprm); if (ret) { bprm->file = original_file; + pr_info("prepare_rto fail %d\n", ret); return ret; } @@ -1129,24 +1159,6 @@ static inline int try_replace_file(struct linux_binprm *bprm) return 0; } -#ifdef CONFIG_ARM64 -#ifdef start_thread -#undef start_thread -#endif - -#define start_thread ___start_thread - -// arm64 start_thread is inline function, so copy it -static inline void ___start_thread(struct pt_regs *regs, unsigned long pc, - unsigned long sp) -{ - start_thread_common(regs, pc); - regs->pstate = PSR_MODE_EL0t; - rto_sym.spectre_v4_enable_task_mitigation(current); - regs->sp = sp; -} -#endif /* CONFIG_ARM64 */ - #endif /* CONFIG_ELF_SYSBOOST */ void print_vma(struct mm_struct *mm) @@ -1177,7 +1189,7 @@ static int load_rto_binary(struct linux_binprm *bprm) unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc __maybe_unused = 0; int executable_stack = EXSTACK_DEFAULT; - struct elfhdr *elf_ex; + struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf; struct elfhdr *interp_elf_ex = NULL; struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; struct mm_struct *mm; @@ -1190,36 +1202,46 @@ static int load_rto_binary(struct linux_binprm *bprm) struct loaded_rto *loaded_rto = NULL; struct list_head *preload_seg_pos = NULL; struct loaded_seg *loaded_seg; + bool using_hpage = false; +#endif + // TODO check rto inode! -load_rto: - elf_ex = (struct elfhdr *)bprm->buf; - is_rto_format = elf_ex->e_flags & OS_SPECIFIC_FLAG_RTO; - is_rto_symbolic_link = IS_SYSBOOST_RTO_SYMBOLIC_LINK(bprm->file->f_inode); retval = -ENOEXEC; +#ifdef CONFIG_ELF_SYSBOOST /* close feature to rmmod this ko */ if (!use_rto) { goto out; } + +load_rto: + is_rto_symbolic_link = IS_SYSBOOST_RTO_SYMBOLIC_LINK(bprm->file->f_inode); + elf_ex = (struct elfhdr *)bprm->buf; + is_rto_format = elf_ex->e_flags & OS_SPECIFIC_FLAG_RTO; if (!is_rto_format && !is_rto_symbolic_link) { goto out; } /* replace app.rto file, then use binfmt */ - if (is_rto_symbolic_link) { - // struct inode *inode = bprm->file->f_inode; + if (is_rto_symbolic_link && !is_rto_format) { + struct inode *inode = bprm->file->f_inode; int ret; - if (use_hpage) + + if (use_hpage) { loaded_rto = find_loaded_rto(bprm->file->f_inode); + using_hpage = true; + } ret = try_replace_file(bprm); if (ret) { /* limit print */ - printk("replace rto file fail, %d\n", ret); + pr_info("replace rto file fail, %d\n", ret); goto out; } - // pr_info("replace rto file success, loaded_rto: 0x%lx, inode: 0x%lx\n", - // loaded_rto, inode); + if (debug) { + pr_info("replace rto success, loaded_rto: %pK, inode: %pK\n", + loaded_rto, inode); + } goto load_rto; } @@ -1300,7 +1322,7 @@ load_rto: interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); if (!interp_elf_ex) { retval = -ENOMEM; - goto out_free_ph; + goto out_free_file; } /* Get the exec headers */ @@ -1409,7 +1431,7 @@ out_free_interp: executable_stack); if (retval < 0) goto out_free_dentry; - + elf_bss = 0; elf_brk = 0; @@ -1418,25 +1440,29 @@ out_free_interp: start_data = 0; end_data = 0; +#ifdef CONFIG_ELF_SYSBOOST if (loaded_rto) preload_seg_pos = &loaded_rto->segs; +#endif /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < elf_ex->e_phnum; i++, elf_ppnt++) { - bool is_exec_seg = elf_ppnt->p_flags & PF_X; int elf_prot, elf_flags; unsigned long k, vaddr; unsigned long total_size = 0; unsigned long alignment; +#ifdef CONFIG_ELF_SYSBOOST unsigned long size, off; + bool is_exec_seg = !(elf_ppnt->p_flags & PF_W); +#endif if (elf_ppnt->p_type != PT_LOAD) continue; if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; - + /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ @@ -1522,11 +1548,33 @@ out_free_interp: * ELF vaddrs will be correctly offset. The result * is then page aligned. */ - // if (use_hpage) +#ifdef CONFIG_ELF_SYSBOOST + if (using_hpage) load_bias = ELF_HPAGESTART(load_bias - vaddr); - // else - // load_bias = ELF_PAGESTART(load_bias - vaddr); + else + load_bias = ELF_PAGESTART(load_bias - vaddr); +#else + load_bias = ELF_PAGESTART(load_bias - vaddr); +#endif + /* + * Calculate the entire size of the ELF mapping + * (total_size), used for the initial mapping, + * due to load_addr_set which is set to true later + * once the initial mapping is performed. + * + * Note that this is only sensible when the LOAD + * segments are contiguous (or overlapping). If + * used for LOADs that are far apart, this would + * cause the holes between LOADs to be mapped, + * running the risk of having the mapping fail, + * as it would be larger than the ELF file itself. + * + * As a result, only ET_DYN does this, since + * some ET_EXEC (e.g. ia64) may have large virtual + * memory holes between LOADs. + * + */ total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); if (!total_size) { @@ -1535,17 +1583,24 @@ out_free_interp: } } +#ifdef CONFIG_ELF_SYSBOOST + error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, + elf_prot, elf_flags, total_size, using_hpage, is_exec_seg); +#else error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, total_size, true, is_exec_seg); - // elf_prot, elf_flags, 0, use_hpage); + elf_prot, elf_flags, total_size); +#endif if (BAD_ADDR(error)) { +#ifdef CONFIG_ELF_SYSBOOST if (debug) - pr_info("lyt elf_map error: %ld\n", PTR_ERR((void*)error)); - retval = IS_ERR((void *)error) ? + pr_info("elf_map error: %ld\n", PTR_ERR((void*)error)); +#endif + retval = IS_ERR_VALUE(error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } - if (use_hpage && preload_seg_pos) { +#ifdef CONFIG_ELF_SYSBOOST + if (using_hpage && preload_seg_pos) { preload_seg_pos = preload_seg_pos->next; BUG_ON(preload_seg_pos == &loaded_rto->segs); loaded_seg = list_entry(preload_seg_pos, @@ -1553,20 +1608,34 @@ out_free_interp: size = elf_ppnt->p_filesz + ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); off = elf_ppnt->p_offset - ELF_HPAGEOFFSET(elf_ppnt->p_vaddr); size = ELF_HPAGEALIGN(size); - if (debug) - pr_info("lyt vaddr: 0x%lx, off: 0x%lx, size: 0x%lx\n", + if (debug){ + pr_info("loaded_seg: %pK\n", loaded_seg); + pr_info("elf_map vaddr: 0x%lx, off: 0x%lx, size: 0x%lx\n", error, off, size); + } if (is_exec_seg) rto_populate(bprm->file, error, off, size, loaded_seg); } +#endif if (!load_addr_set) { load_addr_set = 1; load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); if (elf_ex->e_type == ET_DYN) { +#ifdef CONFIG_ELF_SYSBOOST + if (using_hpage) { + load_bias += error - + ELF_HPAGESTART(load_bias + vaddr); + } else { + load_bias += error - + ELF_PAGESTART(load_bias + vaddr); + } + load_addr += load_bias; +#else load_bias += error - - ELF_HPAGESTART(load_bias + vaddr); + ELF_PAGESTART(load_bias + vaddr); load_addr += load_bias; +#endif reloc_func_desc = load_bias; } } @@ -1615,6 +1684,7 @@ out_free_interp: } } + pr_info("load success\n"); e_entry = elf_ex->e_entry + load_bias; phdr_addr += load_bias; elf_bss += load_bias; @@ -1638,10 +1708,17 @@ out_free_interp: } if (interpreter) { +#ifdef CONFIG_ELF_SYSBOOST elf_entry = load_elf_interp(interp_elf_ex, interpreter, load_bias, interp_elf_phdata, &arch_state, is_rto_format); +#else + elf_entry = load_elf_interp(interp_elf_ex, + interpreter, + load_bias, interp_elf_phdata, + &arch_state); +#endif if (!IS_ERR((void *)elf_entry)) { rto_layout_start_addr = elf_entry; /* @@ -1676,9 +1753,13 @@ out_free_interp: set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES +#ifdef CONFIG_ELF_SYSBOOST retval = __arch_setup_additional_pages(bprm, !!interpreter, load_bias, is_rto_format); +#else + retval = rto_sym.arch_setup_additional_pages(bprm, !!interpreter); if (retval < 0) goto out; +#endif #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ #ifdef CONFIG_ELF_SYSBOOST @@ -1765,6 +1846,7 @@ out: out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); +out_free_file: allow_write_access(interpreter); if (interpreter) fput(interpreter); @@ -1983,7 +2065,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, { const struct cred *cred; unsigned int i, len; - + /* first copy the parameters from user space */ memset(psinfo, 0, sizeof(struct elf_prpsinfo)); @@ -2593,6 +2675,21 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, shdr4extnum->sh_info = segs; } +static int set_crash_info(struct file *exe_file, struct crash_info *msg) +{ + char *path; + char *buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + pr_info("send binary path to sysboostd failed: kmalloc buf failed.\n"); + return -ENOMEM; + } + path = file_path(exe_file, buf, PATH_MAX); + msg->len = strlen(path); + strncpy(msg->path, path, msg->len); + kfree(buf); + return msg->len; +} + /* * Actual dumper * @@ -2611,7 +2708,39 @@ static int elf_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; + struct crash_info *msg; + struct file *exe_file; + struct mm_struct *mm; + + pr_info("test crash\n"); + /* send binary path to sysboostd */ + msg = kmalloc(sizeof(struct crash_info), GFP_KERNEL); + if (!msg) { + pr_info("send binary path to sysboostd failed: kmalloc struct crash_info failed.\n"); + goto core_dump; + } + mm = get_task_mm(current); + if (!mm) { + pr_info("send binary path to sysboostd failed: get task mm failed.\n"); + goto mm_error; + } + exe_file = rto_sym.get_mm_exe_file(mm); + mmput(mm); + if (!exe_file) { + pr_info("send binary path to sysboostd failed: get mm exe file failed.\n"); + goto mm_error; + } + if (set_crash_info(exe_file, msg) <= 0) { + pr_info("send binary path to sysboostd failed: set_crash_info failed.\n"); + goto crashinfo_error; + } + send_to_user(msg); +crashinfo_error: + fput(exe_file); +mm_error: + kfree(msg); +core_dump: /* * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. @@ -2635,7 +2764,7 @@ static int elf_core_dump(struct coredump_params *cprm) has_dumped = 1; - offset += sizeof(elf); /* Elf header */ + offset += sizeof(elf); /* ELF header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ /* Write notes phdr entry */ @@ -2701,7 +2830,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!write_note_info(&info, cprm)) goto end_coredump; diff --git a/src/sysboost_loader/rto_populate.c b/src/sysboost_loader/rto_populate.c index 77a771509016ecf423c9a4b6699817be96783892..ede3b75e23af2542299284ccc9f90fc3de841ad4 100644 --- a/src/sysboost_loader/rto_populate.c +++ b/src/sysboost_loader/rto_populate.c @@ -77,41 +77,6 @@ static char *global_symbol_names[] = { #endif }; -#ifdef CONFIG_X86 -// p4d_alloc -> __p4d_alloc -#define p4d_alloc rto_p4d_alloc -static inline p4d_t *rto_p4d_alloc(struct mm_struct *mm, pgd_t *pgd, - unsigned long address) -{ - return (unlikely(pgd_none(*pgd)) && ppl_sym.__p4d_alloc(mm, pgd, address)) ? - NULL : p4d_offset(pgd, address); -} - -// pud_trans_unstable() -// pud_none_or_trans_huge_or_dev_or_clear_bad() -// pud_clear_bad() -#define pud_trans_unstable rto_pud_trans_unstable - -static inline int rto_pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) -{ - pud_t pudval = READ_ONCE(*pud); - - if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) - return 1; - if (unlikely(pud_bad(pudval))) { - ppl_sym.pud_clear_bad(pud); - return 1; - } - return 0; -} - -static inline int rto_pud_trans_unstable(pud_t *pud) -{ - return rto_pud_none_or_trans_huge_or_dev_or_clear_bad(pud); -} - -#endif - static int init_symbols(void) { int ret; @@ -128,98 +93,15 @@ static vm_fault_t __rto_do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, gfp_t gfp) { struct vm_area_struct *vma = vmf->vma; - // pgtable_t pgtable; - unsigned long haddr = vmf->address & HPAGE_PMD_MASK; vm_fault_t ret = 0; -// vma_set_anonymous(vma); - VM_BUG_ON_PAGE(!PageCompound(page), page); - // pr_info("enter __rto_do_huge_pmd_anonymous_page\n"); if (debug) pr_info("vma->vm_start: %lx, vma->vm_end: %lx, vma->vm_pgoff: %lx\n", vma->vm_start, vma->vm_end, vma->vm_pgoff); ret = ppl_sym.do_set_pmd(vmf, page); - // pr_info("__rto_do_huge_pmd_anonymous_page return %d\n", ret); return ret; - - // if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { - // put_page(page); - // count_vm_event(THP_FAULT_FALLBACK); - // count_vm_event(THP_FAULT_FALLBACK_CHARGE); - // return VM_FAULT_FALLBACK; - // } - // cgroup_throttle_swaprate(page, gfp); - - // pgtable = pte_alloc_one(vma->vm_mm); - // if (unlikely(!pgtable)) { - // ret = VM_FAULT_OOM; - // goto release; - // } - - // clear_huge_page(page, vmf->address, HPAGE_PMD_NR); - /* - * The memory barrier inside __SetPageUptodate makes sure that - * clear_huge_page writes become visible before the set_pmd_at() - * write. - */ - // __SetPageUptodate(page); - - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_none(*vmf->pmd))) { - goto unlock_release; - } else { - pmd_t entry; - - ret = check_stable_address_space(vma->vm_mm); - if (ret) - goto unlock_release; - - /* Deliver the page fault to userland */ - // if (userfaultfd_missing(vma)) { - // vm_fault_t ret2; - - // spin_unlock(vmf->ptl); - // put_page(page); - // pte_free(vma->vm_mm, pgtable); - // ret2 = handle_userfault(vmf, VM_UFFD_MISSING); - // VM_BUG_ON(ret2 & VM_FAULT_FALLBACK); - // return ret2; - // } - - entry = mk_huge_pmd(page, vma->vm_page_prot); - // we don't need write access for text segment. - // entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - - // we don't need LRU. - // page_add_new_anon_rmap(page, vma, haddr, true); - // lru_cache_add_inactive_or_unevictable(page, vma); - - // we won't split thp, no need to deposit - // pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); - - set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); - // pr_info("set_pmd_at entry: 0x%pK, entry_size: %d\n", - // entry, sizeof(entry)); - // add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); - // reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); - mm_inc_nr_ptes(vma->vm_mm); - spin_unlock(vmf->ptl); - - // count_vm_event(THP_FAULT_ALLOC); - // count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC); - } - - return 0; -unlock_release: - spin_unlock(vmf->ptl); -// release: - // if (pgtable) - // pte_free(vma->vm_mm, pgtable); - // put_page(page); - return ret; - } static inline int rto_anon_vma_prepare(struct vm_area_struct *vma) @@ -234,73 +116,16 @@ vm_fault_t rto_do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *hpa { struct vm_area_struct *vma = vmf->vma; gfp_t gfp; - // struct page *page; - // unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - // we have checked boader outside, no need to double check - // if (!transhuge_vma_suitable(vma, haddr)) - // return VM_FAULT_FALLBACK; if (unlikely(rto_anon_vma_prepare(vma))) return VM_FAULT_OOM; - // if (unlikely(khugepaged_enter(vma, vma->vm_flags))) - // return VM_FAULT_OOM; - // if (!(vmf->flags & FAULT_FLAG_WRITE) && - // !mm_forbids_zeropage(vma->vm_mm) && - // transparent_hugepage_use_zero_page()) { - // pgtable_t pgtable; - // struct page *zero_page; - // vm_fault_t ret; - // pgtable = pte_alloc_one(vma->vm_mm); - // if (unlikely(!pgtable)) - // return VM_FAULT_OOM; - // zero_page = mm_get_huge_zero_page(vma->vm_mm); - // if (unlikely(!zero_page)) { - // pte_free(vma->vm_mm, pgtable); - // count_vm_event(THP_FAULT_FALLBACK); - // return VM_FAULT_FALLBACK; - // } - // vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - // ret = 0; - // if (pmd_none(*vmf->pmd)) { - // ret = check_stable_address_space(vma->vm_mm); - // if (ret) { - // spin_unlock(vmf->ptl); - // pte_free(vma->vm_mm, pgtable); - // // } else if (userfaultfd_missing(vma)) { - // // spin_unlock(vmf->ptl); - // // pte_free(vma->vm_mm, pgtable); - // // ret = handle_userfault(vmf, VM_UFFD_MISSING); - // // VM_BUG_ON(ret & VM_FAULT_FALLBACK); - // } else { - // // set_huge_zero_page(pgtable, vma->vm_mm, vma, - // // haddr, vmf->pmd, zero_page); - // spin_unlock(vmf->ptl); - // } - // } else { - // spin_unlock(vmf->ptl); - // pte_free(vma->vm_mm, pgtable); - // } - // return ret; - // } - // gfp = alloc_hugepage_direct_gfpmask(vma); - // TODO - // page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); - // if (unlikely(!page)) { - // count_vm_event(THP_FAULT_FALLBACK); - // return VM_FAULT_FALLBACK; - // } - // prep_transhuge_page(page); return __rto_do_huge_pmd_anonymous_page(vmf, hpage, gfp); } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf, struct page *hpage) { - // if (vma_is_anonymous(vmf->vma)) - return rto_do_huge_pmd_anonymous_page(vmf, hpage); - // if (vmf->vma->vm_ops->huge_fault) - // return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); - // return VM_FAULT_FALLBACK; + return rto_do_huge_pmd_anonymous_page(vmf, hpage); } static inline pud_t *rto_pud_alloc(struct mm_struct *mm, p4d_t *p4d, @@ -330,9 +155,7 @@ static vm_fault_t __rto_handle_mm_fault(struct vm_area_struct *vma, .address = address & PAGE_MASK, .flags = flags, .pgoff = linear_page_index(vma, address), - // .gfp_mask = __get_fault_gfp_mask(vma), }; - // unsigned int dirty = flags & FAULT_FLAG_WRITE; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; @@ -348,42 +171,9 @@ static vm_fault_t __rto_handle_mm_fault(struct vm_area_struct *vma, if (!vmf.pud) return VM_FAULT_OOM; retry_pud: - // if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { - // ret = create_huge_pud(&vmf); - // if (!(ret & VM_FAULT_FALLBACK)) - // return ret; - // } else { - // pud_t orig_pud = *vmf.pud; - - // barrier(); - // if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { - - // /* NUMA case for anonymous PUDs would go here */ - - // if (dirty && !pud_write(orig_pud)) { - // ret = wp_huge_pud(&vmf, orig_pud); - // if (!(ret & VM_FAULT_FALLBACK)) - // return ret; - // } else { - // huge_pud_set_accessed(&vmf, orig_pud); - // return 0; - // } - // } - // } - - pmd = pmd_offset(vmf.pud, address); - // if (pmd) - // pr_info("pmd: %pK\n", pmd); - // else - // pr_info("pmd is null\n"); vmf.pmd = rto_pmd_alloc(mm, vmf.pud, address); if (!vmf.pmd) return VM_FAULT_OOM; - - if (!pmd_none(*vmf.pmd)) { - // pr_info("vmf.pmd: %pK, value: 0x%lx, return\n", vmf.pmd, pmd_val(*vmf.pmd)); - return VM_FAULT_OOM; - } /* Huge pud page fault raced with pmd_alloc? */ if (pud_trans_unstable(vmf.pud)) @@ -391,21 +181,12 @@ retry_pud: // if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf, hpage); - if (debug) { - if (vmf.pmd) { - pr_info("vmf.pmd: %pK, value: 0x%llx, pmd_trans_huge: 0x%d\n", - vmf.pmd, pmd_val(*vmf.pmd), pmd_trans_huge(*pmd)); - } else { - pr_info("vmf.pmd is null\n"); - } - } if (!(ret & VM_FAULT_FALLBACK)) return ret; // } - BUG(); - return 0; + return -ENOMEM; } /* @@ -426,12 +207,6 @@ static vm_fault_t rto_handle_mm_fault(struct vm_area_struct *vma, unsigned long // count_memcg_event_mm(vma->vm_mm, PGFAULT); /* do counter updates before entering really critical section. */ - // check_sync_rss_stat(current); - - // if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - // flags & FAULT_FLAG_INSTRUCTION, - // flags & FAULT_FLAG_REMOTE)) - // return VM_FAULT_SIGSEGV; /* * Enable the memcg OOM handling for faults triggered in user @@ -440,25 +215,14 @@ static vm_fault_t rto_handle_mm_fault(struct vm_area_struct *vma, unsigned long if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); - // if (unlikely(is_vm_hugetlb_page(vma))) - // ret = hugetlb_fault(vma->vm_mm, vma, address, flags); - // else - ret = __rto_handle_mm_fault(vma, address, flags, hpage); + ret = __rto_handle_mm_fault(vma, address, flags, hpage); - if (flags & FAULT_FLAG_USER) { - mem_cgroup_exit_user_fault(); /* * The task may have entered a memcg OOM situation but * if the allocation error was handled gracefully (no * VM_FAULT_OOM), there is no need to kill anything. * Just clean up the OOM state peacefully. */ - // TODO don't consider oom now - // if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) - // mem_cgroup_oom_synchronize(false); - } - - // mm_account_fault(regs, address, flags, ret); return ret; } @@ -469,7 +233,7 @@ static vm_fault_t rto_handle_mm_fault(struct vm_area_struct *vma, unsigned long * is, *@locked will be set to 0 and -EBUSY returned. */ static int rto_faultin_page(struct vm_area_struct *vma, - unsigned long address, unsigned int *flags, int *locked, struct page *hpage) + unsigned long address, unsigned int *flags, bool unshare, int *locked, struct page *hpage) { unsigned int fault_flags = 0; vm_fault_t ret; @@ -517,8 +281,6 @@ static int rto_faultin_page(struct vm_area_struct *vma, * which a read fault here might prevent (a readonly page might get * reCOWed by userspace write). */ - if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags |= FOLL_COW; return 0; } @@ -589,12 +351,8 @@ static long rto_get_user_pages(struct mm_struct *mm, { long ret = 0, i = 0; struct vm_area_struct *vma = NULL; - // struct follow_page_context ctx = { NULL }; struct list_head *hpage_pos = hpages; - // pr_info("start rto_get_user_pages, start: 0x%lx, nr_pages: 0x%lx\n", - // start, nr_pages); - if (!nr_pages) return 0; @@ -607,11 +365,9 @@ static long rto_get_user_pages(struct mm_struct *mm, * fault information is unrelated to the reference behaviour of a task * using the address space */ - if (!(gup_flags & FOLL_FORCE)) - gup_flags |= FOLL_NUMA; do { - struct page *page, *hpage, *new_hpage; + struct page *page = NULL, *hpage, *new_hpage; unsigned int foll_flags = gup_flags; unsigned int page_increm; @@ -626,39 +382,14 @@ static long rto_get_user_pages(struct mm_struct *mm, /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { vma = find_extend_vma(mm, start); - // if (!vma && in_gate_area(mm, start)) { - // ret = get_gate_page(mm, start & PAGE_MASK, - // gup_flags, &vma, - // pages ? &pages[i] : NULL); - // if (ret) - // goto out; - // ctx.page_mask = 0; - // goto next_page; - // } - - // if (!vma || ppl_sym.check_vma_flags(vma, gup_flags)) { - // ret = -EFAULT; - // goto out; - // } - - // if (is_vm_hugetlb_page(vma)) { - // i = follow_hugetlb_page(mm, vma, pages, vmas, - // &start, &nr_pages, i, - // gup_flags, locked); - // if (locked && *locked == 0) { - // /* - // * We've got a VM_FAULT_RETRY - // * and we've lost mmap_lock. - // * We must stop here. - // */ - // BUG_ON(gup_flags & FOLL_NOWAIT); - // BUG_ON(ret != 0); - // goto out; - // } - // continue; - // } + + if (!vma) { + ret = -EFAULT; + goto out; + } + } -// retry: + /* * If we have a pending SIGKILL, don't keep faulting pages and * potentially allocating memory. @@ -670,70 +401,31 @@ static long rto_get_user_pages(struct mm_struct *mm, cond_resched(); /* TODO try comment here to increase efficiency */ - // page = ppl_sym.follow_page_mask(vma, start, foll_flags, &ctx); hpage = list_entry(hpage_pos, struct page, lru); - if (TestPageNeedCopy(hpage)) { - int i; - // pr_info("alloc new_hpage for page: 0x%pK\n", hpage); - new_hpage = alloc_pages(GFP_KERNEL | __GFP_ZERO | __GFP_COMP, - HUGETLB_PAGE_ORDER); - if (!new_hpage) - BUG(); - for (i = 0; i < 1000; i++) { - get_page(new_hpage); - } - memcpy(page_to_virt(new_hpage), page_to_virt(hpage), HPAGE_SIZE); - hpage = new_hpage; - } else { - get_page(hpage); - } + get_page(hpage); if (debug) pr_info("consume hpage 0x%pK, page: 0x%pK\n", hpage, page); if (!page) { - ret = rto_faultin_page(vma, start, &foll_flags, locked, hpage); + ret = rto_faultin_page(vma, start, &foll_flags, PTR_ERR(page) == -EMLINK, locked, hpage); switch (ret) { case 0: // pr_info("retry\n"); goto next_page; // goto retry; case -EBUSY: + case -EAGAIN: ret = 0; fallthrough; case -EFAULT: case -ENOMEM: case -EHWPOISON: goto out; - case -ENOENT: - goto next_page; - } BUG(); - } else if (PTR_ERR(page) == -EEXIST) { - /* - * Proper page table entry exists, but no corresponding - * struct page. - */ - BUG(); - goto next_page; - } else if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; + } } - // if (pages) { - // pages[i] = page; - // flush_anon_page(vma, page, start); - // flush_dcache_page(page); - // ctx.page_mask = 0; - // } next_page: - // if (vmas) { - // vmas[i] = vma; - // ctx.page_mask = 0; - // } page_increm = 0x200; - // page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask); - // // pr_info("page_increm: %d, ctx.page_mask: 0x%x, i: %ld, nr_pages: %ld", - // page_increm, ctx.page_mask, i, nr_pages); if (page_increm > nr_pages) page_increm = nr_pages; i += page_increm; @@ -741,8 +433,6 @@ next_page: nr_pages -= page_increm; } while (nr_pages); out: - // if (ctx.pgmap) - // put_dev_pagemap(ctx.pgmap); return i ? i : ret; } @@ -766,22 +456,29 @@ out: * If @locked is non-NULL, it must held for read only and may be * released. If it's released, *@locked will be set to 0. */ -static long rto_populate_vma_page_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, int *locked, struct list_head *hpages) +long rto_populate_vma_page_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, int *locked, struct list_head *hpages) { struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; + int local_locked = 1; int gup_flags; + long ret; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(end & ~PAGE_MASK); + VM_BUG_ON(!PAGE_ALIGNED(start)); + VM_BUG_ON(!PAGE_ALIGNED(end)); VM_BUG_ON_VMA(start < vma->vm_start, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); - gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK; + /* + * Rightly or wrongly, the VM_LOCKONFAULT case has never used + * faultin_page() to break COW, so it has no work to do here. + */ if (vma->vm_flags & VM_LOCKONFAULT) - gup_flags &= ~FOLL_POPULATE; + return nr_pages; + + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW @@ -801,15 +498,15 @@ static long rto_populate_vma_page_range(struct vm_area_struct *vma, * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return rto_get_user_pages(mm, start, nr_pages, gup_flags, - NULL, NULL, locked, hpages); + ret = rto_get_user_pages(mm, start, nr_pages, gup_flags, + NULL, NULL, locked ? locked : &local_locked, hpages); + return ret; } int rto_populate(struct file *file, unsigned long vaddr, unsigned long offset, unsigned long size, struct loaded_seg *loaded_seg) { struct mm_struct *mm = current->mm; - // struct inode *inode = file->f_inode; struct vm_area_struct *vma; int ret, locked = 1; diff --git a/src/sysboostd/Cargo.toml b/src/sysboostd/Cargo.toml index d763b90b889e89c8ce8602501a17fc4442def85d..8191b19e081afeec812338d1d818c593927597a4 100644 --- a/src/sysboostd/Cargo.toml +++ b/src/sysboostd/Cargo.toml @@ -28,4 +28,7 @@ rust-ini = "0.20.0" version = "3.2.0" [dependencies.libc] -version = "0.2" \ No newline at end of file +version = "0.2" + +[dependencies] +chrono = "0.4" \ No newline at end of file diff --git a/src/sysboostd/bolt.rs b/src/sysboostd/bolt.rs index d1efc3df56122bdd1023ceed9720e0a9bd3c5736..56f5ae99aad9a74ed2f2d0a9482eedf01aa6fa49 100644 --- a/src/sysboostd/bolt.rs +++ b/src/sysboostd/bolt.rs @@ -11,8 +11,9 @@ use crate::common::is_arch_x86_64; use crate::config::{RtoConfig, INIT_CONF}; +use crate::daemon::db_add_link; use crate::lib::process_ext::run_child; -use crate::aot::set_rto_link_flag; +use crate::aot::{set_rto_link_flag, set_app_link_flag}; use std::fs; use std::path::Path; @@ -72,6 +73,8 @@ fn bolt_optimize_bin(conf: &RtoConfig) -> i32 { return ret; } ret = set_rto_link_flag(&rto_path.to_str().unwrap().to_string(), true); + ret = set_app_link_flag(&conf.elf_path, true); + ret = db_add_link(&conf); return ret; } diff --git a/src/sysboostd/coredump_monitor.rs b/src/sysboostd/coredump_monitor.rs index c96bda7bf317684feb0bc600db1c23f9ed34c19a..011a3f3296789e4f5c0a66a2330a8ec6992b1048 100644 --- a/src/sysboostd/coredump_monitor.rs +++ b/src/sysboostd/coredump_monitor.rs @@ -37,10 +37,10 @@ lazy_static! { } // 设置SYSBOOST_LOG_PATH的权限仅root可写 -fn set_mode() { +pub fn set_mode(path: &str) { let mut set_mod: Vec = Vec::new(); set_mod.push("644".to_string()); - set_mod.push(SYSBOOST_LOG_PATH.to_string()); + set_mod.push(path.to_string()); let _ = run_child("/usr/bin/chmod", &set_mod); } @@ -82,7 +82,7 @@ fn record_crashed_path(path: String) { let exist = Path::new(&SYSBOOST_LOG_PATH).exists(); if !exist { let _ = std::fs::File::create(SYSBOOST_LOG_PATH.to_string()); - set_mode(); + set_mode(SYSBOOST_LOG_PATH); } let file_name = Path::new(&SYSBOOST_LOG_PATH); let mut file = match OpenOptions::new().append(true).open(file_name) { @@ -116,10 +116,7 @@ fn do_rollback(path: &String) -> i32 { } // remove link let link_path = format!("{}{}.link", SYSBOOST_DB_PATH, binary_name); - let exist = Path::new(&link_path).exists(); - if exist { - daemon::db_remove_link(&link_path); - } + daemon::db_remove_link(&link_path); // remove bash.rto let exist = Path::new(&rto_path).exists(); diff --git a/src/sysboostd/daemon.rs b/src/sysboostd/daemon.rs index 06a138b3f73815e1229185a36710536008dabcbf..6664fa80de222e5addf9e8606a20b35c8b3f35f1 100644 --- a/src/sysboostd/daemon.rs +++ b/src/sysboostd/daemon.rs @@ -26,7 +26,7 @@ use inotify::{EventMask, Inotify, WatchMask}; use log::{self}; use std::fs; use std::os::unix::fs as UnixFs; -use std::path::{Path}; +use std::path::Path; use std::thread; use std::time::Duration; @@ -116,7 +116,7 @@ fn clean_last_rto() { let real_path = match fs::canonicalize(&p) { Ok(p) => p, Err(e) => { - log::error!("get realpath failed: {}", e); + log::warn!("get realpath failed: {}", e); continue; } }; diff --git a/src/sysboostd/interface.rs b/src/sysboostd/interface.rs new file mode 100644 index 0000000000000000000000000000000000000000..c2474815e8d0375893942739cee64c2578c215dd --- /dev/null +++ b/src/sysboostd/interface.rs @@ -0,0 +1,160 @@ +use std::fs; +use std::path::Path; +use std::os::unix::fs as UnixFs; +use std::fs::OpenOptions; +use std::io::{Write, Read}; +use std::io::BufRead; + +use crate::aot::{set_rto_link_flag, set_app_link_flag}; +use crate::coredump_monitor::set_mode; +use crate::lib::process_ext::run_child; +use crate::daemon::{SYSBOOST_DB_PATH, self}; + +use chrono::Utc; +use chrono::TimeZone; + +pub const OPTIMIZED_ELF_LOG: &str = "/etc/sysboost.d/.optimized.log"; + +pub fn write_back_config(name: &str) -> i32 { + let exist = Path::new(&OPTIMIZED_ELF_LOG).exists(); + if !exist { + let _ = std::fs::File::create(OPTIMIZED_ELF_LOG.to_string()); + set_mode(OPTIMIZED_ELF_LOG); + } + let file_name = Path::new(&OPTIMIZED_ELF_LOG); + let mut file = match OpenOptions::new().append(true).open(file_name) { + Ok(f) => {f} + Err(e) => { + log::error!("open {} failed: {}", OPTIMIZED_ELF_LOG, e); + return -1; + } + }; + let now = Utc::now(); + let content = format!("{} optimized elf: {}\n", now.format("%Y-%m-%d %H:%M:%S"), name); + match file.write_all(content.as_bytes()) { + Ok(_) => {return 0;} + Err(e) => { + log::error!("write {} failed: {}", OPTIMIZED_ELF_LOG, e); + return -1; + } + } +} +pub fn delete_one_record(name: &str) -> i32 { + let exist = Path::new(&OPTIMIZED_ELF_LOG).exists(); + if !exist { + return 0; + } + let file_name = Path::new(&OPTIMIZED_ELF_LOG); + let rfile = match OpenOptions::new().read(true).open(file_name) { + Ok(f) => {f} + Err(e) => { + log::error!("open {} failed: {}", OPTIMIZED_ELF_LOG, e); + return -1; + } + }; + let mut buf = String::new(); + let reader = std::io::BufReader::new(&rfile); + for line in reader.lines() { + if line.as_ref().unwrap().contains(name){ + continue; + } + buf.push_str(line.as_ref().unwrap()); + buf.push_str("\n") + } + let mut wfile = match OpenOptions::new().truncate(true).write(true).open(file_name) { + Ok(f) => {f} + Err(e) => { + log::error!("open {} failed: {}", OPTIMIZED_ELF_LOG, e); + return -1; + } + }; + match wfile.write_all(buf.as_bytes()) { + Ok(_) => {return 0;} + Err(e) => { + log::error!("write {} failed: {}", OPTIMIZED_ELF_LOG, e); + return -1; + } + } + +} +pub fn bolt_add_link(file_name: &str) -> i32 { + // symlink app.link to app, different modes correspond to different directories + let names: Vec<&str> = file_name.split("/").collect(); + let binary_name = names[names.len() - 1]; + let link_path = format!("{}{}.link", SYSBOOST_DB_PATH, binary_name); + let ret_e = UnixFs::symlink(&binary_name, &link_path); + match ret_e { + Ok(_) => log::info!("symlink sucess {}", link_path), + Err(_) => { + log::error!("symlink fail {}", link_path); + return -1; + } + }; + 0 +} + +pub fn gen_bolt_optimize_bin(name: &str, bolt_option: &str, profile_path: &str) -> i32 { + let mut args: Vec = Vec::new(); + if bolt_option.is_empty() { + args.push("-reorder-blocks=ext-tsp".to_string()); + args.push("-reorder-functions=hfsort".to_string()); + args.push("-split-functions".to_string()); + args.push("-split-all-cold".to_string()); + args.push("-split-eh".to_string()); + args.push("-dyno-stats".to_string()); + } else { + let options: Vec<&str> = bolt_option.split(" ").collect(); + for option in options{ + args.push(option.to_string()); + } + } + let elf_path = Path::new(name); + let elf_path = match fs::canonicalize(elf_path) { + Ok(p) => p, + Err(e) => { + log::error!("bolt_optimize_bin: get realpath failed: {}", e); + return -1; + } + }; + let rto_path = elf_path.with_extension("rto"); + args.push(name.to_string()); + args.push("-o".to_string()); + args.push(rto_path.to_str().unwrap().to_string()); + args.push(format!("-data={}", profile_path)); + let mut ret = run_child("/usr/bin/llvm-bolt", &args); + if ret != 0 { + return ret; + } + ret = set_rto_link_flag(&rto_path.to_str().unwrap().to_string(), true); + ret = set_app_link_flag(&name.to_string(), true); + ret = bolt_add_link(name); + return ret; + +} + +pub fn stop_one_elf(path: &str) -> i32 { + let names: Vec<&str> = path.split("/").collect(); + let binary_name = names[names.len() - 1]; + let rto_path = format!("{}.rto", path); + // unset flag + let ret = set_app_link_flag(&path.to_string(), false); + if ret != 0 { + log::error!("Failed to unset link flag for {}", path); + return ret; + } + // remove link + let link_path = format!("{}{}.link", SYSBOOST_DB_PATH, binary_name); + daemon::db_remove_link(&link_path); + + // remove xx.rto + let exist = Path::new(&rto_path).exists(); + if exist { + match fs::remove_file(&rto_path) { + Ok(_) => {} + Err(e) => { + log::error!("remove file failed: {}", e); + } + } + } + 0 +} diff --git a/src/sysboostd/main.rs b/src/sysboostd/main.rs index 91fbdb8d789ce6617fb9239b62648e489d9e3196..d33c0abedd4bc3e1a6756009c1e5b9bfd57dc578 100644 --- a/src/sysboostd/main.rs +++ b/src/sysboostd/main.rs @@ -18,11 +18,16 @@ mod daemon; mod kmod_util; mod lib; mod netlink_client; +mod interface; use crate::config::parse_sysinit_config; use crate::coredump_monitor::coredump_monitor_loop; use crate::coredump_monitor::parse_crashed_log; use crate::daemon::daemon_loop; +use crate::interface::delete_one_record; +use crate::interface::gen_bolt_optimize_bin; +use crate::interface::stop_one_elf; +use crate::interface::write_back_config; use crate::kmod_util::test_kmod; use crate::bolt::gen_profile; use crate::config::INIT_CONF; @@ -35,6 +40,7 @@ use std::thread; const APP_NAME: &str = "sysboostd"; const DEFAULT_TIMEOUT: u32 = 10; +const PROFILE_PATH_DEFAULT: &str = "/usr/lib/sysboost.d/profile/mysqld.profile"; fn parameter_wrong_exit() { println!("parameter is wrong"); @@ -49,6 +55,13 @@ fn main() { let mut timeout = DEFAULT_TIMEOUT; let mut name = ""; + let mut is_bolt = false; + let mut bolt_option = ""; + let mut profile_path = ""; + let mut bolt_elf_name = ""; + + let mut is_stop = false; + let mut stop_elf_name = ""; // arg0 is program name, parameter is from arg1 for i in 1..args.len() { if args[i].contains("--gen-profile=") { @@ -67,7 +80,35 @@ fn main() { } continue; } - + if args[i].contains("--gen-bolt=") { + if let Some(index) = args[i].find('=') { + is_bolt = true; + bolt_elf_name = &args[i][index + 1..]; + } + continue; + } + if args[i].contains("--bolt-option=") { + if let Some(index) = args[i].find('=') { + bolt_option = &args[i][index + 1..]; + } + continue; + } + if args[i].contains("--profile-path=") { + if let Some(index) = args[i].find('=') { + profile_path = &args[i][index + 1..]; + } + continue; + } + if args[i].contains("--stop=") { + if let Some(index) = args[i].find('=') { + is_stop = true; + stop_elf_name = &args[i][index + 1..]; + } + if stop_elf_name.is_empty() { + parameter_wrong_exit(); + } + continue; + } match args[i].as_str() { "--debug" => { is_debug = true; @@ -93,6 +134,26 @@ fn main() { // 配置文件解析 parse_sysinit_config(); parse_crashed_log(); + //sysboostd --gen-bolt="/path/to/mysqld" --bolt-option="xxx" --profile-path="/path/to/mysqld.profile" + if is_bolt { + if profile_path.is_empty() { + profile_path = PROFILE_PATH_DEFAULT; + } + let ret = gen_bolt_optimize_bin(bolt_elf_name, bolt_option, profile_path); + if ret < 0 { + std::process::exit(-1); + } + std::process::exit(write_back_config(bolt_elf_name)); + } + //sysboostd --stop=/path/to/mysqld + if is_stop { + logger::init_log_to_console(APP_NAME, log::LevelFilter::Debug); + let ret = stop_one_elf(stop_elf_name); + if ret < 0 { + std::process::exit(-1); + } + std::process::exit(delete_one_record(stop_elf_name)); + } if is_gen_porfile { logger::init_log_to_console(APP_NAME, log::LevelFilter::Debug); std::process::exit(gen_profile(name, timeout));