diff --git a/src/binfmt_rto/binfmt_rto.c b/src/binfmt_rto/binfmt_rto.c index 6d1c58ea54c9e6f531487282ba1454782cf9f2ba..3a329fc0ec433a73192537e69dc7b5b7e5d0291d 100644 --- a/src/binfmt_rto/binfmt_rto.c +++ b/src/binfmt_rto/binfmt_rto.c @@ -297,13 +297,13 @@ static struct linux_binfmt elf_format = { static int set_brk(unsigned long start, unsigned long end, int prot) { // pr_info("enter set_brk, start: 0x%lx, end: 0x%lx\n", start, end); - if (use_hpage) { - start = ELF_HPAGEALIGN(start); - end = ELF_HPAGEALIGN(end); - } else { + // if (use_hpage) { + // start = ELF_HPAGEALIGN(start); + // end = ELF_HPAGEALIGN(end); + // } else { start = ELF_PAGEALIGN(start); end = ELF_PAGEALIGN(end); - } + // } if (end > start) { /* * Map the last of the bss segment. @@ -314,7 +314,8 @@ static int set_brk(unsigned long start, unsigned long end, int prot) prot & PROT_EXEC ? VM_EXEC : 0); if (error) return error; - // pr_info("set_brk: 0x%lx-0x%lx\n", start, end); + if (debug) + pr_info("set_brk: 0x%lx-0x%lx\n", start, end); } current->mm->start_brk = current->mm->brk = end; return 0; @@ -329,15 +330,15 @@ static int padzero(unsigned long elf_bss) { unsigned long nbyte; - if (use_hpage) - nbyte = ELF_HPAGEOFFSET(elf_bss); - else + // if (use_hpage) + // nbyte = ELF_HPAGEOFFSET(elf_bss); + // else nbyte = ELF_PAGEOFFSET(elf_bss); if (nbyte) { - if (use_hpage) - nbyte = HPAGE_SIZE - nbyte; - else + // if (use_hpage) + // nbyte = HPAGE_SIZE - nbyte; + // else nbyte = ELF_MIN_ALIGN - nbyte; // pr_info("padzero: 0x%lx-0x%lx\n", elf_bss, elf_bss + nbyte); if (clear_user((void __user *) elf_bss, nbyte)) @@ -561,7 +562,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, static unsigned long elf_map(struct file *filep, unsigned long addr, const struct elf_phdr *eppnt, int prot, int type, - unsigned long total_size, bool use_pmd_mapping) + unsigned long total_size, bool use_pmd_mapping, bool is_exec_seg) { unsigned long map_addr, size, off; @@ -569,7 +570,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, size = eppnt->p_filesz + ELF_HPAGEOFFSET(eppnt->p_vaddr); off = eppnt->p_offset - ELF_HPAGEOFFSET(eppnt->p_vaddr); addr = ELF_HPAGESTART(addr); - size = ELF_HPAGEALIGN(size); + if (is_exec_seg) + size = ELF_HPAGEALIGN(size); + else + size = ELF_PAGEALIGN(size); } else { size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); @@ -595,15 +599,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, total_size = ELF_HPAGEALIGN(total_size); else total_size = ELF_PAGEALIGN(total_size); - // pr_info("vm_mmap, addr: %lx, total_size: %lx, off: %lx", - // addr, total_size, off); + if (debug) + pr_info("vm_mmap, addr: %lx, total_size: %lx, off: %lx", + addr, total_size, off); map_addr = vm_mmap(filep, addr, total_size, prot, type, off); if (!BAD_ADDR(map_addr)) vm_munmap(map_addr+size, total_size-size); } else { + if (debug) + pr_info("vm_mmap, addr: %lx, size: %lx, off: %lx", + addr, size, off); map_addr = vm_mmap(filep, addr, size, prot, type, off); - // pr_info("vm_mmap, addr: %lx, size: %lx, off: %lx", - // addr, size, off); } if ((type & MAP_FIXED_NOREPLACE) && @@ -628,10 +634,10 @@ static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) if (first_idx == -1) return 0; - if (use_hpage) - return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - - ELF_HPAGESTART(cmds[first_idx].p_vaddr); - else + // if (use_hpage) + // return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - + // ELF_HPAGESTART(cmds[first_idx].p_vaddr); + // else return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - ELF_PAGESTART(cmds[first_idx].p_vaddr); } @@ -867,7 +873,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, load_addr = -vaddr; map_addr = elf_map(interpreter, load_addr + vaddr, - eppnt, elf_prot, elf_type, total_size, false); + eppnt, elf_prot, elf_type, total_size, false, false); total_size = 0; error = map_addr; @@ -1201,7 +1207,7 @@ load_rto: } /* replace app.rto file, then use binfmt */ - if (is_rto_symbolic_link && !is_rto_format) { + if (is_rto_symbolic_link) { // struct inode *inode = bprm->file->f_inode; int ret; if (use_hpage) @@ -1219,7 +1225,8 @@ load_rto: /* loading rto from now on */ if (debug) { - printk("exec in rto mode, is_rto_format %d\n", is_rto_format); + printk("exec in rto mode, filename: %s, is_rto_symbolic_link: %d, is_rto_format: %d\n", + bprm->file->f_path.dentry->d_iname, is_rto_symbolic_link, is_rto_format); } #endif @@ -1413,6 +1420,7 @@ out_free_interp: the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < elf_ex->e_phnum; i++, elf_ppnt++) { + bool is_exec_seg = elf_ppnt->p_flags & PF_X; int elf_prot, elf_flags; unsigned long k, vaddr; unsigned long total_size = 0; @@ -1510,10 +1518,10 @@ out_free_interp: * ELF vaddrs will be correctly offset. The result * is then page aligned. */ - if (use_hpage) + // if (use_hpage) load_bias = ELF_HPAGESTART(load_bias - vaddr); - else - load_bias = ELF_PAGESTART(load_bias - vaddr); + // else + // load_bias = ELF_PAGESTART(load_bias - vaddr); total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); @@ -1524,7 +1532,8 @@ out_free_interp: } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags, 0, use_hpage); + elf_prot, elf_flags, total_size, true, is_exec_seg); + // elf_prot, elf_flags, 0, use_hpage); if (BAD_ADDR(error)) { if (debug) pr_info("lyt elf_map error: %ld\n", PTR_ERR((void*)error)); @@ -1543,7 +1552,8 @@ out_free_interp: if (debug) pr_info("lyt vaddr: 0x%lx, off: 0x%lx, size: 0x%lx\n", error, off, size); - rto_populate(bprm->file, error, off, size, loaded_seg); + if (is_exec_seg) + rto_populate(bprm->file, error, off, size, loaded_seg); } if (!load_addr_set) { @@ -1551,7 +1561,7 @@ out_free_interp: load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); if (elf_ex->e_type == ET_DYN) { load_bias += error - - ELF_PAGESTART(load_bias + vaddr); + ELF_HPAGESTART(load_bias + vaddr); load_addr += load_bias; reloc_func_desc = load_bias; } @@ -1737,10 +1747,11 @@ out_free_interp: finalize_exec(bprm); start_thread(regs, elf_entry, bprm->p); - if (debug) - pr_info("rto load successful, e_entry: %lx, elf_bss: %lx\n", - e_entry, elf_bss); - print_vma(current->mm); + if (debug) { + pr_info("rto load successful, e_entry: %lx, elf_bss: %lx, mm->brk: %lx\n", + e_entry, elf_bss, mm->brk); + print_vma(current->mm); + } retval = 0; out: diff --git a/src/binfmt_rto/loader_device.c b/src/binfmt_rto/loader_device.c index 6a111c2ceb012c4b92d9ff3a3900cd405ecd3349..528b066ff985ade31c6b72e58858a3ea2600cfd6 100644 --- a/src/binfmt_rto/loader_device.c +++ b/src/binfmt_rto/loader_device.c @@ -46,6 +46,7 @@ static int load_seg(struct file *file, struct loaded_rto *loaded_rto, loff_t pos = offset, end = offset + size; ssize_t bytes; struct inode *inode = file->f_inode; + int i; loaded_seg = loaded_seg_alloc(inode); if (!loaded_seg) @@ -57,6 +58,9 @@ static int load_seg(struct file *file, struct loaded_rto *loaded_rto, ret = -ENOMEM; goto error; } + for (i = 0; i < 100000; i++) { + get_page(page); + } bytes = kernel_read(file, page_to_virt(page), HPAGE_SIZE, &pos); if (bytes < 0) { @@ -75,9 +79,12 @@ static int load_seg(struct file *file, struct loaded_rto *loaded_rto, } else { get_page(page); } - list_add_tail(&page->lru, &loaded_seg->hpages); - // pr_info("load_seg: load 1 hpage: 0x%lx, compound_order(page): %d\n", - // page, compound_order(page)); + // if (loaded_rto->segs.next == &loaded_rto->segs || + // loaded_seg->hpages.next == &loaded_seg->hpages) { + list_add_tail(&page->lru, &loaded_seg->hpages); + pr_info("load_seg: load 1 hpage: 0x%lx, compound_order(page): %d\n", + page, compound_order(page)); + // } } list_add_tail(&loaded_seg->list, &loaded_rto->segs); diff --git a/src/binfmt_rto/rto_populate.c b/src/binfmt_rto/rto_populate.c index 77d17e7f9e6762a817122b708394d4f6c5e0f0e8..77a771509016ecf423c9a4b6699817be96783892 100644 --- a/src/binfmt_rto/rto_populate.c +++ b/src/binfmt_rto/rto_populate.c @@ -132,10 +132,14 @@ static vm_fault_t __rto_do_huge_pmd_anonymous_page(struct vm_fault *vmf, unsigned long haddr = vmf->address & HPAGE_PMD_MASK; vm_fault_t ret = 0; +// vma_set_anonymous(vma); + VM_BUG_ON_PAGE(!PageCompound(page), page); // pr_info("enter __rto_do_huge_pmd_anonymous_page\n"); - + if (debug) + pr_info("vma->vm_start: %lx, vma->vm_end: %lx, vma->vm_pgoff: %lx\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff); ret = ppl_sym.do_set_pmd(vmf, page); // pr_info("__rto_do_huge_pmd_anonymous_page return %d\n", ret); return ret; @@ -373,10 +377,6 @@ retry_pud: // else // pr_info("pmd is null\n"); vmf.pmd = rto_pmd_alloc(mm, vmf.pud, address); - // if (vmf.pmd) - // pr_info("vmf.pmd: %pK, value: 0x%lx\n", vmf.pmd, pmd_val(*vmf.pmd)); - // else - // pr_info("vmf.pmd is null\n"); if (!vmf.pmd) return VM_FAULT_OOM; @@ -391,9 +391,18 @@ retry_pud: // if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf, hpage); + if (debug) { + if (vmf.pmd) { + pr_info("vmf.pmd: %pK, value: 0x%llx, pmd_trans_huge: 0x%d\n", + vmf.pmd, pmd_val(*vmf.pmd), pmd_trans_huge(*pmd)); + } else { + pr_info("vmf.pmd is null\n"); + } + } if (!(ret & VM_FAULT_FALLBACK)) return ret; // } + BUG(); return 0; @@ -608,7 +617,11 @@ static long rto_get_user_pages(struct mm_struct *mm, hpage_pos = hpage_pos->next; // pr_info("hpage_pos: 0x%pK, addr: 0x%lx\n", hpage_pos, start); - BUG_ON(hpage_pos == hpages); + if (hpage_pos == hpages) { + if (debug) + pr_info("hpage used up\n"); + return 0; + } /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { @@ -645,7 +658,7 @@ static long rto_get_user_pages(struct mm_struct *mm, // continue; // } } -retry: +// retry: /* * If we have a pending SIGKILL, don't keep faulting pages and * potentially allocating memory. @@ -660,11 +673,15 @@ retry: // page = ppl_sym.follow_page_mask(vma, start, foll_flags, &ctx); hpage = list_entry(hpage_pos, struct page, lru); if (TestPageNeedCopy(hpage)) { + int i; // pr_info("alloc new_hpage for page: 0x%pK\n", hpage); new_hpage = alloc_pages(GFP_KERNEL | __GFP_ZERO | __GFP_COMP, HUGETLB_PAGE_ORDER); if (!new_hpage) BUG(); + for (i = 0; i < 1000; i++) { + get_page(new_hpage); + } memcpy(page_to_virt(new_hpage), page_to_virt(hpage), HPAGE_SIZE); hpage = new_hpage; } else {