From 48061b1ca2783a5df0638e8d7f6299dbca3d2452 Mon Sep 17 00:00:00 2001 From: Zhou Kang Date: Sat, 22 Jul 2023 09:09:08 +0000 Subject: [PATCH] fix search libs order --- src/elf_link_common.c | 88 +++++++++++++++++++++++++++----- src/elf_link_common.h | 22 +++++--- src/elf_link_elf.c | 44 ++++++++++------ src/elf_read_elf.c | 72 +++++++++++++++++--------- src/elf_read_elf.h | 36 +++++++++++-- src/elf_relocation.c | 97 ++++++++++++++++++++++++++---------- src/elf_relocation_aarch64.c | 11 +++- src/elf_relocation_x86_64.c | 39 +++------------ src/elf_write_elf.h | 7 +++ tests/bash/.gitignore | 4 +- tests/bash/Makefile | 8 ++- 11 files changed, 304 insertions(+), 124 deletions(-) diff --git a/src/elf_link_common.c b/src/elf_link_common.c index 4881bc6..30d7bb6 100644 --- a/src/elf_link_common.c +++ b/src/elf_link_common.c @@ -559,7 +559,6 @@ Elf64_Shdr *find_tmp_section_by_name(elf_link_t *elf_link, const char *sec_name) return NULL; } -// addr != offset from RELRO segment static unsigned long _get_new_elf_addr(elf_link_t *elf_link, elf_file_t *src_ef, unsigned long addr) { int len = elf_link->sec_mapping_arr->len; @@ -697,13 +696,17 @@ static unsigned long get_ifunc_new_addr(elf_link_t *elf_link, elf_file_t *ef, El static unsigned long _get_new_addr_by_sym_name(elf_link_t *elf_link, char *sym_name) { - int count = elf_link->in_ef_nr; + int in_ef_nr = elf_link->in_ef_nr; elf_file_t *ef = NULL; Elf64_Sym *sym = NULL; int sym_count; - // find in all ELF symtab - for (int i = 1; i < count; i++) { + // find in all ELF symtab, find template elf after + int i = 0; + if (is_static_nolibc_mode(elf_link)) { + i = 1; + } + for (; i < in_ef_nr; i++) { ef = &elf_link->in_efs[i]; sym_count = ef->symtab_sec->sh_size / sizeof(Elf64_Sym); Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + ef->symtab_sec->sh_offset); @@ -743,6 +746,70 @@ out: return get_new_addr_by_old_addr(elf_link, ef, sym->st_value); } +// lookup symbol in order +// scope: /usr/bin/bash /usr/lib64/libtinfo.so.6 /usr/lib64/libc.so.6 /lib64/ld-linux-x86-64.so.2 +static unsigned long get_new_addr_by_lookup(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Sym *sym) +{ + char *sym_name = elf_get_sym_name(src_ef, sym); + + if (sym->st_shndx == SHN_UNDEF) { + goto out; + } + + // find in main ELF + elf_file_t *ef = get_main_ef(elf_link); + Elf64_Sym *lookup_sym = elf_find_dynsym_by_name(ef, sym_name); + if ((lookup_sym != NULL) && (lookup_sym->st_shndx != SHN_UNDEF)) { + return get_new_addr_by_old_addr(elf_link, ef, lookup_sym->st_value); + } + + // use self ELF sym + return get_new_addr_by_old_addr(elf_link, src_ef, sym->st_value); + +out: + // find sym in other merge ELF + return _get_new_addr_by_sym_name(elf_link, sym_name); +} + +Elf64_Sym *elf_lookup_symbol_by_rela(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, elf_file_t **lookup_ef) +{ + Elf64_Sym *sym = elf_get_dynsym_by_rela(src_ef, src_rela); + char *sym_name = elf_get_sym_name(src_ef, sym); + + int type = ELF64_R_TYPE(src_rela->r_info); + if (type != R_X86_64_COPY) { + si_panic("type wrong %s %lx\n", src_ef->file_name, src_rela->r_offset); + return NULL; + } + + // feature: find order need deps lib + int in_ef_nr = elf_link->in_ef_nr; + elf_file_t *ef = NULL; + Elf64_Sym *syms = NULL; + int sym_count; + + for (int i = 1; i < in_ef_nr; i++) { + ef = &elf_link->in_efs[i]; + if (ef == src_ef) { + // dont find src ELF, src ELF is main ELF + continue; + } + + syms = elf_get_dynsym_array(ef); + sym_count = elf_get_dynsym_count(ef); + for (int j = 0; j < sym_count; j++) { + sym = &syms[j]; + char *name = elf_get_sym_name(ef, sym); + if (elf_is_same_symbol_name(sym_name, name) && sym->st_shndx != SHN_UNDEF) { + *lookup_ef = ef; + return sym; + } + } + } + + return NULL; +} + static char *get_ifunc_nice_name(char *sym_name) { if (sym_name == NULL) { @@ -861,8 +928,8 @@ static unsigned long get_ifunc_new_addr(elf_link_t *elf_link, elf_file_t *ef, El // use ifunc return value ret = _get_ifunc_new_addr_by_dl(elf_link, ef, sym, nice_sym_name); } - append_symbol_mapping(elf_link, nice_sym_name, ret); - SI_LOG_DEBUG("ifunc %s %16lx\n", nice_sym_name, ret); + append_symbol_mapping(elf_link, sym_name, ret); + SI_LOG_DEBUG("ifunc %-30s %16lx\n", sym_name, ret); return ret; } @@ -889,13 +956,8 @@ static unsigned long _get_new_addr_by_sym(elf_link_t *elf_link, elf_file_t *ef, return get_ifunc_new_addr(elf_link, ef, sym, sym_name); } - // When the shndx != SHN_UNDEF, the symbol in this ELF. - if (sym->st_shndx != SHN_UNDEF) { - return get_new_addr_by_old_addr(elf_link, ef, sym->st_value); - } - - // find sym in other merge ELF - return _get_new_addr_by_sym_name(elf_link, sym_name); + // lookup order + return get_new_addr_by_lookup(elf_link, ef, sym); } unsigned long get_new_addr_by_symobj_ok(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym) diff --git a/src/elf_link_common.h b/src/elf_link_common.h index 0bf5a3f..ca99ebe 100644 --- a/src/elf_link_common.h +++ b/src/elf_link_common.h @@ -272,26 +272,35 @@ static inline void modify_elf_file(elf_link_t *elf_link, unsigned long loc, void memcpy(dst, val, len); } + +// symbol bool is_symbol_maybe_undefined(const char *name); bool is_gnu_weak_symbol(Elf64_Sym *sym); -bool is_section_needed(elf_link_t *elf_link, elf_file_t *ef, Elf64_Shdr *sec); +int get_new_sym_index_no_clear(elf_link_t *elf_link, elf_file_t *src_ef, unsigned int old_index); +int get_new_sym_index(elf_link_t *elf_link, elf_file_t *src_ef, unsigned int old_index); +Elf64_Sym *elf_lookup_symbol_by_rela(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, elf_file_t **lookup_ef); +// addr unsigned long get_new_addr_by_old_addr(elf_link_t *elf_link, elf_file_t *src_ef, unsigned long addr); unsigned long get_new_addr_by_old_addr_ok(elf_link_t *elf_link, elf_file_t *src_ef, unsigned long addr); -unsigned long get_new_offset_by_old_offset(elf_link_t *elf_link, elf_file_t *src_ef, unsigned long offset); unsigned long get_new_addr_by_symobj_ok(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym); unsigned long get_new_addr_by_symobj(elf_link_t *elf_link, elf_file_t *ef, Elf64_Sym *sym); +unsigned long get_new_offset_by_old_offset(elf_link_t *elf_link, elf_file_t *src_ef, unsigned long offset); +// tls +unsigned long elf_get_new_tls_offset(elf_link_t *elf_link, elf_file_t *ef, unsigned long obj_tls_offset); + +// section +bool is_section_needed(elf_link_t *elf_link, elf_file_t *ef, Elf64_Shdr *sec); int get_new_section_index(elf_link_t *elf_link, elf_file_t *src_ef, unsigned int sec_index); unsigned long get_new_name_offset(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Shdr *src_sec, unsigned long offset); -int get_new_sym_index_no_clear(elf_link_t *elf_link, elf_file_t *src_ef, unsigned int old_index); -int get_new_sym_index(elf_link_t *elf_link, elf_file_t *src_ef, unsigned int old_index); -// for temp sections +// temp sections char *elf_get_tmp_section_name(elf_link_t *elf_link, Elf64_Shdr *shdr); Elf64_Shdr *find_tmp_section_by_name(elf_link_t *elf_link, const char *sec_name); Elf64_Shdr *find_tmp_section_by_src(elf_link_t *elf_link, Elf64_Shdr *shdr); +// section map void show_sec_mapping(elf_link_t *elf_link); void append_sec_mapping(elf_link_t *elf_link, elf_file_t *ef, Elf64_Shdr *sec, Elf64_Shdr *dst_sec); void append_obj_mapping(elf_link_t *elf_link, elf_file_t *ef, Elf64_Shdr *sec, void *src_obj, void *dst_obj); @@ -299,10 +308,9 @@ elf_obj_mapping_t *elf_get_mapping_by_dst(elf_link_t *elf_link, void *dst_obj); elf_sec_mapping_t *elf_find_sec_mapping_by_dst(elf_link_t *elf_link, void *_dst_offset); elf_sec_mapping_t *elf_find_sec_mapping_by_srcsec(elf_link_t *elf_link, Elf64_Shdr *src_sec); +// symbol map void append_symbol_mapping(elf_link_t *elf_link, char *symbol_name, unsigned long symbol_addr); unsigned long get_new_addr_by_symbol_mapping(elf_link_t *elf_link, char *symbol_name); void init_symbol_mapping(elf_link_t *elf_link); -unsigned long elf_get_new_tls_offset(elf_link_t *elf_link, elf_file_t *ef, unsigned long obj_tls_offset); - #endif /* _ELF_LINK_COMMON_H */ diff --git a/src/elf_link_elf.c b/src/elf_link_elf.c index e893b5c..71f889e 100644 --- a/src/elf_link_elf.c +++ b/src/elf_link_elf.c @@ -1227,7 +1227,7 @@ static inline Elf64_Sym *get_src_sym_by_dst(elf_link_t *elf_link, Elf64_Sym *dst static void modify_symbol(elf_link_t *elf_link, Elf64_Shdr *sec) { int len = sec->sh_size / sizeof(Elf64_Sym); - Elf64_Sym *base = ((void *)elf_link->out_ef.hdr) + sec->sh_offset; + Elf64_Sym *base = elf_get_section_data(&elf_link->out_ef, sec); for (int i = 0; i < len; i++) { Elf64_Sym *dst_sym = &base[i]; @@ -1247,7 +1247,7 @@ static void modify_symbol(elf_link_t *elf_link, Elf64_Shdr *sec) static Elf64_Sym *find_defined_symbol(elf_file_t *ef, Elf64_Shdr *sec, char *sym_name) { int count = sec->sh_size / sizeof(Elf64_Sym); - Elf64_Sym *base = ((void *)ef->hdr) + sec->sh_offset; + Elf64_Sym *base = elf_get_section_data(ef, sec); for (int i = 0; i < count; i++) { Elf64_Sym *dst_sym = &base[i]; @@ -1266,7 +1266,7 @@ static Elf64_Sym *find_defined_symbol(elf_file_t *ef, Elf64_Shdr *sec, char *sym static void delete_undefined_symbol(elf_file_t *ef, Elf64_Shdr *sec) { int count = sec->sh_size / sizeof(Elf64_Sym); - Elf64_Sym *base = ((void *)ef->hdr) + sec->sh_offset; + Elf64_Sym *base = elf_get_section_data(ef, sec); for (int i = 0; i < count; i++) { Elf64_Sym *dst_sym = &base[i]; @@ -1284,7 +1284,7 @@ static void delete_undefined_symbol(elf_file_t *ef, Elf64_Shdr *sec) static void sort_symbol_table(elf_file_t *ef, Elf64_Shdr *sec) { int count = sec->sh_size / sizeof(Elf64_Sym); - void *base = ((void *)ef->hdr) + sec->sh_offset; + void *base = elf_get_section_data(ef, sec); qsort(base, count, sizeof(Elf64_Sym), sym_cmp_func); } @@ -1463,27 +1463,25 @@ char *disabled_funcs[] = { "frame_dummy", "__do_global_dtors_aux", }; + #define DISABLED_FUNCS_LEN (sizeof(disabled_funcs) / sizeof(disabled_funcs[0])) #define AARCH64_INSN_RET 0xD65F03C0U #define X86_64_INSN_RET 0xC3 -static void modify_init_and_fini(elf_link_t *elf_link) + +static void modify_init_and_fini_ef(elf_link_t *elf_link, elf_file_t *ef) { - if (is_share_mode(elf_link) == true) { - return; - } Elf64_Ehdr *hdr = elf_link->out_ef.hdr; - if (hdr->e_machine != EM_AARCH64 && hdr->e_machine != EM_X86_64) { - si_panic("e_machine not support\n"); - } - elf_file_t *out_ef = &elf_link->out_ef; // In .init_array and .fini_array, static-pie mode the EXEC ELF no need run // so we need to disable such functions in EXEC ELF - elf_file_t *ef = get_main_ef(elf_link); for (unsigned j = 0; j < DISABLED_FUNCS_LEN; j++) { Elf64_Sym *sym = elf_find_symbol_by_name(ef, disabled_funcs[j]); - unsigned long addr = get_new_addr_by_symobj_ok(elf_link, ef, sym); + if (sym == NULL) { + // do nothing + continue; + } + unsigned long addr = get_new_addr_by_symobj(elf_link, ef, sym); if (hdr->e_machine == EM_AARCH64) { elf_write_u32(out_ef, addr, AARCH64_INSN_RET); } else { @@ -1492,6 +1490,24 @@ static void modify_init_and_fini(elf_link_t *elf_link) } } +static void modify_init_and_fini(elf_link_t *elf_link) +{ + if (is_share_mode(elf_link) == true) { + return; + } + Elf64_Ehdr *hdr = elf_link->out_ef.hdr; + if (hdr->e_machine != EM_AARCH64 && hdr->e_machine != EM_X86_64) { + si_panic("e_machine not support\n"); + } + + elf_file_t *ef; + int in_ef_nr = elf_link->in_ef_nr; + for (int i = 0; i < in_ef_nr; i++) { + ef = &elf_link->in_efs[i]; + modify_init_and_fini_ef(elf_link, ef); + } +} + static void do_special_adapts(elf_link_t *elf_link) { modify_init_and_fini(elf_link); diff --git a/src/elf_read_elf.c b/src/elf_read_elf.c index 22c6943..c1a10ed 100644 --- a/src/elf_read_elf.c +++ b/src/elf_read_elf.c @@ -41,6 +41,23 @@ #define ELF_VERSION_NR_LOCAL 0 #define ELF_VERSION_NR_GLOBAL 1 +Elf64_Rela *elf_get_rela_by_addr(elf_file_t *ef, unsigned long addr) +{ + Elf64_Shdr *sec = elf_find_section_by_name(ef, ".rela.dyn"); + Elf64_Rela *relas = (Elf64_Rela *)elf_get_section_data(ef, sec); + int count = sec->sh_size / sizeof(Elf64_Rela); + Elf64_Rela *rela = NULL; + + for (int i = 0; i < count; i++) { + rela = &relas[i]; + if (rela->r_offset == addr) { + return rela; + } + } + + return NULL; +} + static inline char *elf_get_version_name(elf_file_t *ef, Elf64_Vernaux *vernaux) { return ef->dynstr_data + vernaux->vna_name; @@ -92,7 +109,7 @@ char *elf_get_dynsym_version_name(elf_file_t *ef, Elf64_Sym *sym) return elf_get_version_name(ef, vernaux); } -bool elf_is_copy_symbol(elf_file_t *ef, Elf64_Sym *sym) +bool elf_is_symbol_at_libc(elf_file_t *ef, Elf64_Sym *sym) { char *sym_name = elf_get_sym_name(ef, sym); bool is_dynsym = elf_is_dynsym(ef, sym); @@ -140,31 +157,36 @@ bool elf_is_same_symbol_name(const char *a, const char *b) return true; } -int find_dynsym_index_by_name(elf_file_t *ef, const char *name, bool clear) +Elf64_Sym *elf_find_dynsym_by_name(elf_file_t *ef, const char *name) { Elf64_Sym *syms = elf_get_dynsym_array(ef); int count = elf_get_dynsym_count(ef); - int found_index = -1; - Elf64_Sym *sym = NULL; char *sym_name = NULL; + for (int i = 0; i < count; i++) { sym = &syms[i]; - sym_name = elf_get_sym_name(ef, sym); + sym_name = elf_get_dynsym_name(ef, sym); if (elf_is_same_symbol_name(sym_name, name)) { - if (clear && sym->st_shndx != 0) { - return NEED_CLEAR_RELA; - } - found_index = i; - break; + return sym; } } - if (found_index == -1) { + return NULL; +} + +int find_dynsym_index_by_name(elf_file_t *ef, const char *name, bool clear) +{ + Elf64_Sym *sym = elf_find_dynsym_by_name(ef, name); + if (sym == NULL) { si_panic("%s\n", name); } - return found_index; + if (clear && sym->st_shndx != 0) { + return NEED_CLEAR_RELA; + } + + return elf_get_dynsym_index(ef, sym); } int elf_find_func_range_by_name(elf_file_t *ef, const char *func_name, @@ -176,9 +198,8 @@ int elf_find_func_range_by_name(elf_file_t *ef, const char *func_name, } *start = sym->st_value; - Elf64_Shdr *sec = ef->symtab_sec; - Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + sec->sh_offset); - unsigned count = sec->sh_size / sizeof(Elf64_Sym); + Elf64_Sym *syms = elf_get_symtab_array(ef); + unsigned count = elf_get_symtab_count(ef); *end = ~0UL; for (unsigned i = 0; i < count; i++) { @@ -198,9 +219,8 @@ int elf_find_func_range_by_name(elf_file_t *ef, const char *func_name, unsigned elf_find_symbol_index_by_name(elf_file_t *ef, const char *name) { - Elf64_Shdr *sec = ef->symtab_sec; - Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + sec->sh_offset); - int count = sec->sh_size / sizeof(Elf64_Sym); + Elf64_Sym *syms = elf_get_symtab_array(ef); + int count = elf_get_symtab_count(ef); for (int i = 0; i < count; i++) { Elf64_Sym *sym = &syms[i]; @@ -211,22 +231,28 @@ unsigned elf_find_symbol_index_by_name(elf_file_t *ef, const char *name) } } - si_panic("find symbol fail %s %s\n", ef->file_name, name); - return ~0U; /* unreachable */ + return NOT_FOUND_SYM; } Elf64_Sym *elf_find_symbol_by_name(elf_file_t *ef, const char *sym_name) { - Elf64_Shdr *sec = ef->symtab_sec; - Elf64_Sym *syms = (Elf64_Sym *)(((void *)ef->hdr) + sec->sh_offset); + Elf64_Sym *syms = elf_get_symtab_array(ef); + unsigned i = elf_find_symbol_index_by_name(ef, sym_name); + if (i == NOT_FOUND_SYM) { + return NULL; + } + return &syms[i]; } unsigned long elf_find_symbol_addr_by_name(elf_file_t *ef, char *sym_name) { Elf64_Sym *sym = elf_find_symbol_by_name(ef, sym_name); - return sym->st_value; + if (sym) { + return sym->st_value; + } + si_panic("can not find sym, %s %s\n", ef->file_name, sym_name); return 0; } diff --git a/src/elf_read_elf.h b/src/elf_read_elf.h index a5cf7b1..a641ed3 100644 --- a/src/elf_read_elf.h +++ b/src/elf_read_elf.h @@ -16,6 +16,8 @@ #include #include +#define NOT_FOUND_SYM (-1U) + #define NEED_CLEAR_RELA (-2) #define RELOCATION_ROOT_DIR "/usr/lib/relocation" @@ -83,6 +85,16 @@ static inline char *elf_get_symbol_name(elf_file_t *ef, Elf64_Sym *sym) return ef->strtab_data + sym->st_name; } +static inline Elf64_Sym *elf_get_symtab_array(elf_file_t *ef) +{ + return (Elf64_Sym *)elf_get_section_data(ef, ef->symtab_sec); +} + +static inline int elf_get_symtab_count(elf_file_t *ef) +{ + return ef->symtab_sec->sh_size / sizeof(Elf64_Sym); +} + static inline Elf64_Sym *elf_get_dynsym_array(elf_file_t *ef) { return (Elf64_Sym *)elf_get_section_data(ef, ef->dynsym_sec); @@ -129,6 +141,20 @@ static inline char *elf_get_sym_name(elf_file_t *ef, Elf64_Sym *sym) return sym_name; } +static inline int elf_get_symbol_index(Elf64_Rela *rela) +{ + return ELF64_R_SYM(rela->r_info); +} + +static inline bool elf_is_rela_symbol_null(Elf64_Rela *rela) +{ + int index = elf_get_symbol_index(rela); + if (index == 0) { + return true; + } + return false; +} + static inline Elf64_Sym *elf_get_symtab_by_rela(elf_file_t *ef, Elf64_Rela *rela) { return (Elf64_Sym *)((void *)ef->hdr + ef->symtab_sec->sh_offset) + ELF64_R_SYM(rela->r_info); @@ -144,13 +170,17 @@ int elf_find_func_range_by_name(elf_file_t *ef, const char *func_name, unsigned long *start, unsigned long *end); // symbol +bool elf_is_same_symbol_name(const char *a, const char *b); +bool elf_is_symbol_at_libc(elf_file_t *ef, Elf64_Sym *sym); unsigned elf_find_symbol_index_by_name(elf_file_t *ef, const char *name); Elf64_Sym *elf_find_symbol_by_name(elf_file_t *ef, const char *sym_name); unsigned long elf_find_symbol_addr_by_name(elf_file_t *ef, char *sym_name); -bool elf_is_same_symbol_name(const char *a, const char *b); -char *elf_get_dynsym_name_by_index(elf_file_t *ef, unsigned int index); +Elf64_Sym *elf_find_dynsym_by_name(elf_file_t *ef, const char *name); int find_dynsym_index_by_name(elf_file_t *ef, const char *name, bool clear); -bool elf_is_copy_symbol(elf_file_t *ef, Elf64_Sym *sym); +char *elf_get_dynsym_name_by_index(elf_file_t *ef, unsigned int index); + +// rela +Elf64_Rela *elf_get_rela_by_addr(elf_file_t *ef, unsigned long addr); // section Elf64_Shdr *elf_find_section_by_tls_offset(elf_file_t *ef, unsigned long obj_tls_offset); diff --git a/src/elf_relocation.c b/src/elf_relocation.c index 9f7b2e4..3abc8bc 100644 --- a/src/elf_relocation.c +++ b/src/elf_relocation.c @@ -25,6 +25,7 @@ #include "elf_link_common.h" #include "elf_relocation.h" +#include "elf_write_elf.h" #define BYTES_NOP1 0x90 @@ -142,42 +143,81 @@ static void modify_rela_to_RELATIVE(elf_link_t *elf_link, elf_file_t *src_ef, El rela_change_to_relative(dst_rela, ret); } -static void rela_use_relative(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +// data not add base +static unsigned long get_data_after_relocation(elf_file_t *ef, unsigned long addr) { - // 000000000012dd60 000001b900000005 R_X86_64_COPY 000000000012dd60 stdout@GLIBC_2.2.5 + 0 - // 441: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 (2) - // libc: 1407: 00000000001ed688 8 OBJECT GLOBAL DEFAULT 36 stdout@@GLIBC_2.2.5 - // copy symbol data to bss area + // data may be modify by .rela.dyn, so get data from sym.value - elf_file_t *ef = get_libc_ef(elf_link); - Elf64_Sym *sym = elf_get_dynsym_by_rela(src_ef, src_rela); - if (elf_is_copy_symbol(src_ef, sym) == false) { - // use local symbol addr - ef = src_ef; + Elf64_Rela *rela = elf_get_rela_by_addr(ef, addr); + if (rela == NULL) { + // var in bss is set to zero, no rela + // 718: 00000000001f4ce0 8 OBJECT GLOBAL DEFAULT 44 __environ@@GLIBC_2.2.5 + return NOT_FOUND; } - char *sym_name = elf_get_sym_name(src_ef, sym); - unsigned long old_sym_addr = elf_find_symbol_addr_by_name(ef, sym_name); - unsigned long new_sym_addr = get_new_addr_by_old_addr(elf_link, ef, old_sym_addr); - if (new_sym_addr == NOT_FOUND) { - si_panic("%s %lx\n", src_ef->file_name, src_rela->r_offset); + Elf64_Sym *sym = elf_get_dynsym_by_rela(ef, rela); + if (sym->st_value == 0UL) { + si_panic("%s %lx\n", ef->file_name, addr); + } + // (sym->st_value + base) will set in addr rela->r_offset + return sym->st_value; +} + +// relocation bash, R_X86_64_COPY type lookup from deps lib +// 56775: symbol=stdout; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: symbol=stdout; lookup in file=/usr/lib64/libc.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libc.so.6 [0]: normal symbol `stdout' [GLIBC_2.2.5] +// 56775: symbol=stdin; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: symbol=stdin; lookup in file=/usr/lib64/libc.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libc.so.6 [0]: normal symbol `stdin' [GLIBC_2.2.5] +// 56775: symbol=UP; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libtinfo.so.6 [0]: normal symbol `UP' +// 56775: symbol=__environ; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: symbol=__environ; lookup in file=/usr/lib64/libc.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libc.so.6 [0]: normal symbol `__environ' [GLIBC_2.2.5] +// 56775: symbol=PC; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libtinfo.so.6 [0]: normal symbol `PC' +// 56775: symbol=BC; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libtinfo.so.6 [0]: normal symbol `BC' +// 56775: symbol=stderr; lookup in file=/usr/lib64/libtinfo.so.6 [0] +// 56775: symbol=stderr; lookup in file=/usr/lib64/libc.so.6 [0] +// 56775: binding file /usr/bin/bash [0] to /usr/lib64/libc.so.6 [0]: normal symbol `stderr' [GLIBC_2.2.5] + +// timeline +// relocation libc, (00000000001ed688 libc.stdout) <= _IO_2_1_stdout_ addr +// relocation bash, (000000000012dd60 bash.stdout) <= (00000000001ed688 libc.stdout) data COPY +// bash: +// 000000000012dd60 000001b900000005 R_X86_64_COPY 000000000012dd60 stdout@GLIBC_2.2.5 + 0 +// 441: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 (2) +// libc: +// 00000000001ed688 0000026600000001 R_X86_64_64 00000000001ed5a0 _IO_2_1_stdout_@@GLIBC_2.2.5 + 0 +// 1407: 00000000001ed688 8 OBJECT GLOBAL DEFAULT 36 stdout@@GLIBC_2.2.5 +// 614: 00000000001ed5a0 224 OBJECT GLOBAL DEFAULT 36 _IO_2_1_stdout_@@GLIBC_2.2.5 +static void rela_use_relative(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +{ + // copy symbol data to app bss area + + elf_file_t *lookup_ef = NULL; + Elf64_Sym *lookup_sym = elf_lookup_symbol_by_rela(elf_link, src_ef, src_rela, &lookup_ef); + + // data will be modify by .rela.dyn, really data need add base + unsigned long old_addr_in_data = get_data_after_relocation(lookup_ef, lookup_sym->st_value); + if (old_addr_in_data == NOT_FOUND) { + // 511: 000000000012dd80 8 OBJECT GLOBAL DEFAULT 36 __environ@GLIBC_2.2.5 (2) + // var is allready 0, do nothing + elf_clear_rela(dst_rela); return; } - unsigned long data = elf_read_u64_va(ef, new_sym_addr); // check copy size - if (sym->st_size == sizeof(unsigned long)) { - // 8 byte - } else if (sym->st_size == sizeof(unsigned char)) { - // NOTE: target mem is bss, so relative type write 7 zero is OK - // 1 byte - data = (unsigned char)data; - } else { + Elf64_Sym *sym = elf_get_dynsym_by_rela(src_ef, src_rela); + if (sym->st_size != sizeof(unsigned long)) { si_panic("size wrong %s %lx\n", src_ef->file_name, src_rela->r_offset); return; } - rela_change_to_relative(dst_rela, data); + unsigned long new_addr_in_data = get_new_addr_by_old_addr(elf_link, lookup_ef, old_addr_in_data); + rela_change_to_relative(dst_rela, new_addr_in_data); } void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) @@ -187,6 +227,7 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * // modify offset dst_rela->r_offset = get_new_addr_by_old_addr(elf_link, src_ef, src_rela->r_offset); + // old sym index to new index of .dynsym unsigned int old_index = ELF64_R_SYM(src_rela->r_info); int new_index = get_new_sym_index_no_clear(elf_link, src_ef, old_index); @@ -233,6 +274,9 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * fallthrough; case R_X86_64_RELATIVE: case R_AARCH64_RELATIVE: + if (!elf_is_rela_symbol_null(src_rela)) { + si_panic("%s %lx\n", src_ef->file_name, src_rela->r_offset); + } dst_rela->r_addend = get_new_addr_by_old_addr(elf_link, src_ef, src_rela->r_addend); break; case R_AARCH64_TLS_TPREL: @@ -257,7 +301,7 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * break; default: SI_LOG_ERR("%s %lx\n", src_ef->file_name, src_rela->r_offset); - si_panic("error not supported modify_rela_dyn type %d\n", type); + si_panic("error not supported type %d\n", type); } SI_LOG_DEBUG("old r_offset %016lx r_info %016lx r_addend %016lx -> new r_offset %016lx r_info %016lx r_addend %016lx\n", @@ -299,6 +343,9 @@ void modify_got(elf_link_t *elf_link) // 2006: 00000000003ffbd8 0 OBJECT LOCAL DEFAULT ABS _GLOBAL_OFFSET_TABLE_ elf_file_t *template_ef = get_template_ef(elf_link); Elf64_Sym *sym = elf_find_symbol_by_name(template_ef, "_GLOBAL_OFFSET_TABLE_"); + if (sym == NULL) { + si_panic("find sym fail\n"); + } unsigned long new_addr = get_new_addr_by_old_addr(elf_link, template_ef, sym->st_value); elf_file_t *out_ef = &elf_link->out_ef; elf_write_u64(out_ef, new_addr, find_sec->sh_addr); diff --git a/src/elf_relocation_aarch64.c b/src/elf_relocation_aarch64.c index 59dd86f..ed179fa 100644 --- a/src/elf_relocation_aarch64.c +++ b/src/elf_relocation_aarch64.c @@ -476,6 +476,9 @@ static void modify_new_adrp(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rela *re old_sym_addr = get_adrp_addr(old_insn, old_offset); /* make sure old_sym_addr locate in .got */ sym = elf_find_symbol_by_name(ef, "_GLOBAL_OFFSET_TABLE_"); + if (sym == NULL) { + si_panic("find sym fail\n"); + } if (old_sym_addr < sym->st_value) { old_sym_addr = sym->st_value; } @@ -1010,7 +1013,7 @@ void correct_stop_libc_atexit(elf_link_t *elf_link) int ret = elf_find_func_range_by_name(template_ef, "__run_exit_handlers", &start, &end); if (ret) { - si_panic("%s: elf_find_func_range_by_name fail\n", __func__); + si_panic("elf_find_func_range_by_name fail\n"); } /* find ldr with __stop___libc_atexit rela in __run_exit_handlers() */ @@ -1018,6 +1021,9 @@ void correct_stop_libc_atexit(elf_link_t *elf_link) Elf64_Rela *relas = ((void *)template_ef->hdr) + sec->sh_offset; unsigned len = sec->sh_size / sec->sh_entsize; unsigned sym_id = elf_find_symbol_index_by_name(template_ef, "__stop___libc_atexit"); + if (sym_id == NOT_FOUND_SYM) { + si_panic("find sym fail\n"); + } unsigned long old_ldr_addr = 0; for (unsigned i = 0; i < len; i++) { @@ -1077,6 +1083,9 @@ void correct_stop_libc_atexit(elf_link_t *elf_link) continue; } Elf64_Sym *sym = elf_find_symbol_by_name(out_ef, "__stop___libc_atexit"); + if (sym == NULL) { + si_panic("find sym fail\n"); + } rela->r_addend = sym->st_value; SI_LOG_DEBUG("change .rela.dyn 0x%lx's value to 0x%lx\n", rela->r_offset, sym->st_value); diff --git a/src/elf_relocation_x86_64.c b/src/elf_relocation_x86_64.c index cce2625..4159e1b 100644 --- a/src/elf_relocation_x86_64.c +++ b/src/elf_relocation_x86_64.c @@ -25,6 +25,7 @@ #include "elf_link_common.h" #include "elf_instruction.h" +#include "elf_write_elf.h" #define BYTES_NOP1 0x90 @@ -262,8 +263,8 @@ static void modify_insn_func_offset(elf_link_t *elf_link, elf_file_t *ef, Elf64_ // This is where to make the change unsigned long loc = get_new_addr_by_old_addr(elf_link, ef, rela->r_offset); - unsigned long sym_addr = get_new_addr_by_symobj_ok(elf_link, ef, sym); - if (sym_addr == 0) { + unsigned long sym_addr = get_new_addr_by_symobj(elf_link, ef, sym); + if (sym_addr == NOT_FOUND) { // share mode libc func is use plt, no need change if (is_share_mode(elf_link)) { return; @@ -271,14 +272,15 @@ static void modify_insn_func_offset(elf_link_t *elf_link, elf_file_t *ef, Elf64_ const char *sym_name = elf_get_sym_name(ef, sym); if (is_symbol_maybe_undefined(sym_name)) { + sym_addr = 0UL; goto out; } si_panic("find func fail %s %016lx\n", sym_name, rela->r_offset); return; } - val = (long)sym_addr - (long)loc + rela->r_addend; out: + val = (long)sym_addr - (long)loc + rela->r_addend; modify_elf_file(elf_link, loc, &val, sizeof(int)); } @@ -306,29 +308,6 @@ static void modify_insn_for_pc32(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rel return; } - // stdout symbol is in libc, point addr in bash bss - // 000000000012dd60 000001b900000005 R_X86_64_COPY 000000000012dd60 stdout@GLIBC_2.2.5 + 0 - // 441: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 (2) - // 000000000004758e 0000066a00000002 R_X86_64_PC32 000000000012dd60 stdout@GLIBC_2.2.5 - 4 - // 1642: 000000000012dd60 8 OBJECT GLOBAL DEFAULT 36 stdout@GLIBC_2.2.5 - // 4758b: 48 8b 3d ce 67 0e 00 mov 0xe67ce(%rip),%rdi # 12dd60 - // libc environ is weak, so other ELF have the some var - // feature: 48 89 05 96 f8 0d 00 mov %rax,0xdf896(%rip) - // 952: 00000000001f4ce0 8 OBJECT WEAK DEFAULT 44 environ@@GLIBC_2.2.5 - if (is_direct_point_var_optimize(elf_link) && elf_is_copy_symbol(ef, sym)) { - unsigned long sym_addr = get_new_addr_by_symobj(elf_link, ef, sym); - if (sym_addr != NOT_FOUND) { - unsigned char *insn = get_insn_begin_by_offset(elf_link, ef, rela); - int ret = elf_insn_change_mov_to_lea(insn); - if (ret != 0) { - si_panic("%s %lx\n", ef->file_name, rela->r_offset); - } - unsigned long loc = get_new_addr_by_old_addr(elf_link, ef, rela->r_offset); - modify_insn_offset(elf_link, loc, sym_addr, rela->r_addend); - return; - } - } - // feature: if layout not random, use imm value, do not use lea // data is use offset, STT_OBJECT @@ -434,12 +413,6 @@ int modify_local_call_rela(elf_link_t *elf_link, elf_file_t *ef, Elf64_Rela *rel return 0; } -static void clear_rela(Elf64_Rela *dst_rela) -{ - (void)memset(dst_rela, 0, sizeof(*dst_rela)); - // TODO: bug, R_X86_64_NONE can not in .rela.plt -} - #define ADDRESS_OF_FOUR_BYTES 4 #define ADDRESS_OF_SIX_BYTES 6 void modify_rela_plt(elf_link_t *elf_link, si_array_t *arr) @@ -465,7 +438,7 @@ void modify_rela_plt(elf_link_t *elf_link, si_array_t *arr) int new_index = get_new_sym_index_no_clear(elf_link, obj_rel->src_ef, old_index); // func in this ELF need clear rela if (new_index == NEED_CLEAR_RELA) { - clear_rela(dst_rela); + elf_clear_rela(dst_rela); continue; } dst_rela->r_info = ELF64_R_INFO(new_index, ELF64_R_TYPE(src_rela->r_info)); diff --git a/src/elf_write_elf.h b/src/elf_write_elf.h index f3ffd2d..6c9fc57 100644 --- a/src/elf_write_elf.h +++ b/src/elf_write_elf.h @@ -15,9 +15,16 @@ #include #include +#include #include "elf_link_common.h" +// old ld.so can not compat to R_X86_64_NONE in .rela.plt +static inline void elf_clear_rela(Elf64_Rela *dst_rela) +{ + (void)memset(dst_rela, 0, sizeof(*dst_rela)); +} + void elf_modify_file_zero(elf_link_t *elf_link, unsigned long offset, unsigned long len); void elf_modify_section_zero(elf_link_t *elf_link, char *secname); diff --git a/tests/bash/.gitignore b/tests/bash/.gitignore index 856977a..397b4a7 100644 --- a/tests/bash/.gitignore +++ b/tests/bash/.gitignore @@ -1,3 +1 @@ -bash -bash.001 -bash.002 +*.log diff --git a/tests/bash/Makefile b/tests/bash/Makefile index caccf9f..ac1c8a0 100644 --- a/tests/bash/Makefile +++ b/tests/bash/Makefile @@ -84,7 +84,11 @@ env: objdump -d $(BASH) > bash.asm readelf -W -a $(LIBC_R) > libc.so.elf objdump -d $(LIBC_R) > libc.so.asm - readelf -W -a $(LIBTINFO_R) > libtinfo.so.elf - objdump -d $(LIBTINFO_R) > libtinfo.so.asm + readelf -W -a $(LIBTINFO) > libtinfo.so.elf + objdump -d $(LIBTINFO) > libtinfo.so.asm readelf -W -a $(LIBLD) > ld.so.elf objdump -d $(LIBLD) > ld.so.asm + LD_DEBUG=all $(BASH) -c "echo 1" &> bash.ld.log + +clean: + $(RM) *.o *.ro *.old *.so *.asm *.elf *.rto *.out *.log -- Gitee