diff --git a/Makefile b/Makefile index aa5252eac5dc38c266c4e94b5bed0a9f7b15d47b..7e9d8b99991b6f424848aff7affcfd1b567e09b2 100644 --- a/Makefile +++ b/Makefile @@ -18,14 +18,17 @@ binfmt_rto: make -C src/binfmt_rto release: + rm -rf Cargo.lock rm -rf build meson build debug: + rm -rf Cargo.lock rm -rf build meson build --buildtype=debug clean: + rm -rf Cargo.lock ninja -C build clean cargo clean diff --git a/src/elf_link_common.c b/src/elf_link_common.c index 91d8fc40ccb69bc50fe2e5ef0f49db30d9f27482..9239a048439adbd2e8491d1fe34444a462c1a3f4 100644 --- a/src/elf_link_common.c +++ b/src/elf_link_common.c @@ -125,7 +125,7 @@ void show_symbol_mapping(elf_link_t *elf_link) static void append_symbol_mapping_by_name(elf_link_t *elf_link, char *key, elf_file_t *ef, char *sym_name) { - unsigned long old_sym_addr = find_sym_old_addr(ef, sym_name); + unsigned long old_sym_addr = elf_find_symbol_addr_by_name(ef, sym_name); unsigned long new_sym_addr = get_new_addr_by_old_addr(elf_link, ef, old_sym_addr); append_symbol_mapping(elf_link, key, new_sym_addr); } @@ -582,6 +582,7 @@ static unsigned long _get_new_addr_by_sym_name(elf_link_t *elf_link, char *sym_n Elf64_Sym *sym = NULL; int sym_count; + // find in all ELF symtab for (int i = 1; i < count; i++) { ef = &elf_link->in_efs[i]; sym_count = ef->symtab_sec->sh_size / sizeof(Elf64_Sym); @@ -595,6 +596,7 @@ static unsigned long _get_new_addr_by_sym_name(elf_link_t *elf_link, char *sym_n } } + // find in template elf ef = get_template_ef(elf_link); sym_count = ef->symtab_sec->sh_size / sizeof(Elf64_Sym); Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + ef->symtab_sec->sh_offset); @@ -606,6 +608,7 @@ static unsigned long _get_new_addr_by_sym_name(elf_link_t *elf_link, char *sym_n } } + // static mode need find symbol if (is_share_mode(elf_link) == false) { si_log_set_global_level(SI_LOG_LEVEL_DEBUG); show_symbol_mapping(elf_link); @@ -744,27 +747,6 @@ static unsigned long get_ifunc_new_addr(elf_link_t *elf_link, elf_file_t *ef, El return ret; } -unsigned long find_sym_old_addr(elf_file_t *ef, char *sym_name) -{ - int sym_count = ef->symtab_sec->sh_size / sizeof(Elf64_Sym); - Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + ef->symtab_sec->sh_offset); - for (int j = 0; j < sym_count; j++) { - Elf64_Sym *sym = &syms[j]; - char *name = elf_get_symbol_name(ef, sym); - if (elf_is_same_symbol_name(sym_name, name) && sym->st_shndx != SHN_UNDEF) { - return sym->st_value; - } - } - si_panic("can not find sym, %s %s\n", ef->file_name, sym_name); - return 0; -} - -unsigned long find_sym_new_addr(elf_link_t *elf_link, elf_file_t *ef, char *sym_name) -{ - unsigned long old_addr = find_sym_old_addr(ef, sym_name); - return get_new_addr_by_old_addr(elf_link, ef, old_addr); -} - static unsigned long _get_new_addr_by_sym(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym, bool is_dynsym) { diff --git a/src/elf_link_common.h b/src/elf_link_common.h index cd4abc9c090ce5fc239cd630d8e5e6bb345b40d4..bb587941d7b7bbed993680381b8b34bacffe7648 100644 --- a/src/elf_link_common.h +++ b/src/elf_link_common.h @@ -204,6 +204,11 @@ static inline elf_file_t *get_out_ef(elf_link_t *elf_link) return &elf_link->out_ef; } +static inline elf_file_t *get_libc_ef(elf_link_t *elf_link) +{ + return elf_link->libc_ef; +} + static inline int elf_read_s32(elf_file_t *ef, unsigned long offset) { void *addr = ((void *)ef->hdr + (unsigned long)offset); @@ -226,12 +231,17 @@ static inline unsigned elf_read_u32_va(elf_file_t *ef, unsigned long va) return elf_read_u32(ef, elf_va_to_offset(ef, va)); } -static inline unsigned long elf_read_u64(elf_file_t *ef, unsigned long addr_) +static inline unsigned long elf_read_u64(elf_file_t *ef, unsigned long offset) { - void *addr = ((void *)ef->hdr + (unsigned long)addr_); + void *addr = ((void *)ef->hdr + (unsigned long)offset); return *(unsigned long *)addr; } +static inline unsigned elf_read_u64_va(elf_file_t *ef, unsigned long va) +{ + return elf_read_u64(ef, elf_va_to_offset(ef, va)); +} + static inline void elf_write_u64(elf_file_t *ef, unsigned long addr_, unsigned long value) { unsigned long *addr = ((void *)ef->hdr + (unsigned long)addr_); @@ -275,8 +285,6 @@ char *elf_get_tmp_section_name(elf_link_t *elf_link, Elf64_Shdr *shdr); Elf64_Shdr *find_tmp_section_by_name(elf_link_t *elf_link, const char *sec_name); Elf64_Shdr *find_tmp_section_by_src(elf_link_t *elf_link, Elf64_Shdr *shdr); -unsigned long find_sym_old_addr(elf_file_t *ef, char *sym_name); -unsigned long find_sym_new_addr(elf_link_t *elf_link, elf_file_t *ef, char *sym_name); unsigned long get_new_addr_by_sym_ok(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym); unsigned long get_new_addr_by_sym(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym); unsigned long get_new_addr_by_dynsym(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym); diff --git a/src/elf_link_elf.c b/src/elf_link_elf.c index 469747c81b83c27440e67e69208b2ecf1e7b4f47..c21688ac9ce09fb45f4a0c4761725d696efb221e 100644 --- a/src/elf_link_elf.c +++ b/src/elf_link_elf.c @@ -163,13 +163,10 @@ elf_file_t *elf_link_add_infile(elf_link_t *elf_link, char *path) } elf_link->in_ef_nr++; - // TODO: clean code, do not use libc_ef if (strncmp("libc.so", si_basename(path), sizeof("libc.so") - 1) == 0) { elf_link->libc_ef = ef; } - // TODO: feature, zk--- recursion add dep lib - return ef; } @@ -696,7 +693,7 @@ static void write_sysboost_section(elf_link_t *elf_link) } elf_file_t *main_ef = get_main_ef(elf_link); - unsigned long old_sym_addr = find_sym_old_addr(main_ef, "main"); + unsigned long old_sym_addr = elf_find_symbol_addr_by_name(main_ef, "main"); unsigned long new_sym_addr = get_new_addr_by_old_addr(elf_link, main_ef, old_sym_addr); elf_link->sysboost_data->entry_addr = new_sym_addr; }*/ diff --git a/src/elf_read_elf.c b/src/elf_read_elf.c index fd7b68cca7ead280fe3f70286a0baf55dacdf391..b78464d31d184dc4f578dad7b67eae42382f5515 100644 --- a/src/elf_read_elf.c +++ b/src/elf_read_elf.c @@ -38,6 +38,27 @@ #define DEBUG_SEC_PRE_NAME ".debug_" #define BUILD_ID_LEN 40 +bool elf_is_copy_symbol(elf_file_t *ef, Elf64_Sym *sym, bool is_dynsym) +{ + char *sym_name = NULL; + if (is_dynsym) { + sym_name = elf_get_dynsym_name(ef, sym); + // stdout@GLIBC_2.2.5 (2) + // TODO: + + + } else { + sym_name = elf_get_symbol_name(ef, sym); + // symtab name have @LIBC + char *c = index(sym_name, '@'); + if (c) { + return true; + } + } + + return false; +} + // cmp symbol name without sym version bool elf_is_same_symbol_name(const char *a, const char *b) { @@ -153,6 +174,14 @@ Elf64_Sym *elf_find_symbol_by_name(elf_file_t *ef, const char *sym_name) return &syms[i]; } +unsigned long elf_find_symbol_addr_by_name(elf_file_t *ef, char *sym_name) +{ + Elf64_Sym *sym = elf_find_symbol_by_name(ef, sym_name); + return sym->st_value; + si_panic("can not find sym, %s %s\n", ef->file_name, sym_name); + return 0; +} + unsigned long elf_va_to_offset(elf_file_t *ef, unsigned long va) { Elf64_Shdr *sechdrs = ef->sechdrs; diff --git a/src/elf_read_elf.h b/src/elf_read_elf.h index 11f51a790d6bb1707a7c6891f4a1adfaebf751a3..18f6c9a79e2dab7cefa4badfa6d6f0f1fa8ede97 100644 --- a/src/elf_read_elf.h +++ b/src/elf_read_elf.h @@ -86,9 +86,11 @@ int elf_find_func_range_by_name(elf_file_t *ef, const char *func_name, // symbol unsigned elf_find_symbol_index_by_name(elf_file_t *ef, const char *name); Elf64_Sym *elf_find_symbol_by_name(elf_file_t *ef, const char *sym_name); +unsigned long elf_find_symbol_addr_by_name(elf_file_t *ef, char *sym_name); bool elf_is_same_symbol_name(const char *a, const char *b); char *get_sym_name_dynsym(elf_file_t *ef, unsigned int index); int find_dynsym_index_by_name(elf_file_t *ef, const char *name, bool clear); +bool elf_is_copy_symbol(elf_file_t *ef, Elf64_Sym *sym, bool is_dynsym); // section Elf64_Shdr *elf_find_section_by_tls_offset(elf_file_t *ef, unsigned long obj_tls_offset); diff --git a/src/elf_relocation.c b/src/elf_relocation.c index 6d2a368bb2b00519c76d809b268adc260e36eda0..66992b98f554cd7307b6e513445b75c5fb85ff4b 100644 --- a/src/elf_relocation.c +++ b/src/elf_relocation.c @@ -108,6 +108,19 @@ void modify_local_call(elf_link_t *elf_link) } } +static void rela_change_to_relative(Elf64_Rela *dst_rela, unsigned long addend) +{ + dst_rela->r_addend = addend; + +#ifdef __aarch64__ + dst_rela->r_info = ELF64_R_INFO(0, ELF64_R_TYPE(R_AARCH64_RELATIVE)); +#else + dst_rela->r_info = ELF64_R_INFO(0, ELF64_R_TYPE(R_X86_64_RELATIVE)); +#endif + + // offset modify by caller +} + // The __stack_chk_guard and __stack_chk_fail symbols are normally supplied by a GCC library called libssp // we can not change code to direct access the symbol, some code use 2 insn to point symbol, the adrp insn may be shared static void modify_rela_to_RELATIVE(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) @@ -125,15 +138,36 @@ static void modify_rela_to_RELATIVE(elf_link_t *elf_link, elf_file_t *src_ef, El // do nothing return; } - dst_rela->r_addend = ret; -#ifdef __aarch64__ - dst_rela->r_info = ELF64_R_INFO(0, ELF64_R_TYPE(R_AARCH64_RELATIVE)); -#else - dst_rela->r_info = ELF64_R_INFO(0, ELF64_R_TYPE(R_X86_64_RELATIVE)); -#endif + rela_change_to_relative(dst_rela, ret); +} - // offset modify by caller +static void rela_use_relative(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +{ + // 000000000012dd60 000001b900000005 R_X86_64_COPY 000000000012dd60 stdout@GLIBC_2.2.5 + 0 + // 441: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 (2) + // libc: 1407: 00000000001ed688 8 OBJECT GLOBAL DEFAULT 36 stdout@@GLIBC_2.2.5 + // copy symbol data to bss area + + Elf64_Sym *sym = elf_get_dynsym_by_rela(src_ef, src_rela); + if (elf_is_copy_symbol(src_ef, sym, true) == false) { + si_panic("%s %lx\n", src_ef->file_name, src_rela->r_offset); + return; + } + + char *sym_name = elf_get_dynsym_name(src_ef, sym); + elf_file_t *libc_ef = get_libc_ef(elf_link); + unsigned long old_sym_addr = elf_find_symbol_addr_by_name(libc_ef, sym_name); + unsigned long new_sym_addr = get_new_addr_by_old_addr(elf_link, libc_ef, old_sym_addr); + if (new_sym_addr == NOT_FOUND) { + si_panic("%s %lx\n", src_ef->file_name, src_rela->r_offset); + return; + } + + // TODO: check copy size + + unsigned long data = elf_read_u64_va(libc_ef, new_sym_addr); + rela_change_to_relative(dst_rela, data); } void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) @@ -204,10 +238,7 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * dst_rela->r_addend = elf_get_new_tls_offset(elf_link, src_ef, src_rela->r_addend); break; case R_X86_64_COPY: - // 000000000012dd60 000001b900000005 R_X86_64_COPY 000000000012dd60 stdout@GLIBC_2.2.5 + 0 - // 441: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 (2) - // copy addr of sym to bss var, dyn sym need fix sym.value, see modify_symbol() - // nothing need to do here + rela_use_relative(elf_link, src_ef, src_rela, dst_rela); break; case R_AARCH64_COPY: // Variables in the bss section, some from glibc, some declared by the application diff --git a/src/elf_relocation_aarch64.c b/src/elf_relocation_aarch64.c index a8fd63a4a68d9b2003eff4426858d89084eaf056..754f45252464323d4ad74a9de25c5f71b44155b1 100644 --- a/src/elf_relocation_aarch64.c +++ b/src/elf_relocation_aarch64.c @@ -711,7 +711,7 @@ static void modify_branch_insn(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rela char *name = elf_get_symbol_name(ef, sym); if (unlikely(elf_is_same_symbol_name(name, "main"))) { elf_file_t *main_ef = get_main_ef(elf_link); - old_sym_addr = find_sym_old_addr(main_ef, "main"); + old_sym_addr = elf_find_symbol_addr_by_name(main_ef, "main"); new_sym_addr = get_new_addr_by_old_addr(elf_link, main_ef, old_sym_addr); goto out; } @@ -720,7 +720,7 @@ static void modify_branch_insn(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rela // exit process, and directly calls the _Exit function to end the process. if (!is_share_mode(elf_link) && unlikely(elf_is_same_symbol_name(name, "exit"))) { elf_file_t *template_ef = get_template_ef(elf_link); - old_sym_addr = find_sym_old_addr(template_ef, "_exit"); + old_sym_addr = elf_find_symbol_addr_by_name(template_ef, "_exit"); new_sym_addr = get_new_addr_by_old_addr(elf_link, template_ef, old_sym_addr); goto out; } diff --git a/src/elf_relocation_x86_64.c b/src/elf_relocation_x86_64.c index b530e8ad24b9047b03b5a5a052765f1cbebeec36..b127c89b686d15927a44d04d8b360c342b4d0bd3 100644 --- a/src/elf_relocation_x86_64.c +++ b/src/elf_relocation_x86_64.c @@ -300,17 +300,6 @@ static void fix_main_for_static_mode(elf_link_t *elf_link, elf_file_t *ef, Elf64 } } -static bool is_indirect_point_to_symbol(elf_file_t *ef, Elf64_Sym *sym) -{ - char *sym_name = elf_get_symbol_name(ef, sym); - char *c = index(sym_name, '@'); - if (c) { - return true; - } - - return false; -} - static void modify_insn_for_pc32(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rela *rela, Elf64_Sym *sym) { // STT_FUNC no need reloc @@ -328,7 +317,7 @@ static void modify_insn_for_pc32(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rel // libc environ is weak, so other ELF have the some var // feature: 48 89 05 96 f8 0d 00 mov %rax,0xdf896(%rip) // 952: 00000000001f4ce0 8 OBJECT WEAK DEFAULT 44 environ@@GLIBC_2.2.5 - if (is_direct_point_var_optimize(elf_link) && is_indirect_point_to_symbol(ef, sym)) { + if (is_direct_point_var_optimize(elf_link) && elf_is_copy_symbol(ef, sym, false)) { unsigned long sym_addr = get_new_addr_by_sym(elf_link, ef, sym); if (sym_addr != NOT_FOUND) { unsigned char *insn = get_insn_begin_by_offset(elf_link, ef, rela);