From 8d17db28a113b3f2c59e18114081b60f2fe7b2e4 Mon Sep 17 00:00:00 2001 From: Zhou Kang Date: Thu, 10 Aug 2023 15:37:16 +0000 Subject: [PATCH] fix ld.so init --- src/elf_check_elf.c | 8 ++++ src/elf_link_common.h | 19 --------- src/elf_link_elf.c | 17 ++++++-- src/elf_read_elf.c | 2 +- src/elf_read_elf.h | 9 +++++ src/elf_relocation.c | 93 ++++++++++++++++++++++++++++--------------- src/elf_write_elf.c | 4 +- tests/bash/Makefile | 2 + 8 files changed, 96 insertions(+), 58 deletions(-) diff --git a/src/elf_check_elf.c b/src/elf_check_elf.c index 12a06aa..4eeaed7 100644 --- a/src/elf_check_elf.c +++ b/src/elf_check_elf.c @@ -77,6 +77,11 @@ static void check_rela_dyn(elf_link_t *elf_link, elf_file_t *out_ef) if (ELF64_R_SYM(rela->r_info) == 0) { continue; } + + if (elf_rela_is_relative(rela) == false) { + SI_LOG_EMERG("rela is not relative, offset %lx info %lx\n", rela->r_offset, rela->r_info); + } + Elf64_Sym *sym = elf_get_dynsym_by_rela(out_ef, rela); const char *sym_name = elf_get_sym_name(out_ef, sym); if (is_dynsym_valid(sym, sym_name) == false @@ -113,6 +118,9 @@ static void check_dynamic(elf_link_t *elf_link, elf_file_t *out_ef) static void check_func(elf_file_t *out_ef, unsigned long func_point) { Elf64_Rela *rela = elf_get_rela_by_addr(out_ef, func_point); + if (rela == NULL) { + si_panic("not fonud r_offset in .rely.dyn, %lx\n", func_point); + } // 00000000001222d0 0000000000000008 R_X86_64_RELATIVE 30720 // rela must be type R_X86_64_RELATIVE diff --git a/src/elf_link_common.h b/src/elf_link_common.h index befe125..342a0c8 100644 --- a/src/elf_link_common.h +++ b/src/elf_link_common.h @@ -221,25 +221,6 @@ static inline elf_file_t *get_libc_ef(elf_link_t *elf_link) return elf_link->libc_ef; } - -static inline bool is_rela_plt_name(const char *name) -{ - if (strcmp(name, ".rela.plt") == 0) { - return true; - } - - return false; -} - -static inline bool is_rela_dyn_name(const char *name) -{ - if (strcmp(name, ".rela.dyn") == 0) { - return true; - } - - return false; -} - static inline bool is_init_name(const char *name) { if (strcmp(name, ".init_array") == 0) { diff --git a/src/elf_link_elf.c b/src/elf_link_elf.c index 459be9e..6a8dc30 100644 --- a/src/elf_link_elf.c +++ b/src/elf_link_elf.c @@ -707,6 +707,7 @@ static void dynamic_copy_dyn(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Dyn case DT_VERNEED: case DT_VERSYM: case DT_GNU_HASH: + case DT_STRTAB: case DT_SYMTAB: dst_dyn->d_un.d_val = get_new_addr_by_old_addr(elf_link, src_ef, src_dyn->d_un.d_val); break; @@ -740,6 +741,9 @@ static Elf64_Dyn *dynamic_copy_dyn_by_type(elf_link_t *elf_link, elf_file_t *src // 0x000000006ffffffe (VERNEED) 0x23f70 // 0x000000006fffffff (VERNEEDNUM) 1 // 0x000000006ffffff0 (VERSYM) 0x222de +// 0x000000006ffffef5 (GNU_HASH) 0x42e0 +// 0x0000000000000005 (STRTAB) 0x1a470 +// 0x0000000000000006 (SYMTAB) 0x89c8 static unsigned long libc_dt_arr[] = { DT_NEEDED, DT_SONAME, @@ -749,6 +753,7 @@ static unsigned long libc_dt_arr[] = { DT_VERNEEDNUM, DT_VERSYM, DT_GNU_HASH, + DT_STRTAB, DT_SYMTAB, }; #define LIBC_DT_ARR_LEN (sizeof(libc_dt_arr) / sizeof(libc_dt_arr[0])) @@ -829,6 +834,7 @@ static int dynamic_copy_obj(elf_link_t *elf_link, Elf64_Dyn *begin_dyn, int len) new_d_val = get_new_name_offset(elf_link, ef, ef->dynstr_sec, dyn->d_un.d_val); break; case DT_GNU_HASH: + case DT_STRTAB: case DT_SYMTAB: if (is_static_nold_mode(elf_link)) { // have done before @@ -837,7 +843,6 @@ static int dynamic_copy_obj(elf_link_t *elf_link, Elf64_Dyn *begin_dyn, int len) fallthrough; case DT_INIT: case DT_FINI: - case DT_STRTAB: case DT_PLTGOT: case DT_RELA: new_d_val = get_new_addr_by_old_addr(elf_link, ef, dyn->d_un.d_val); @@ -1176,14 +1181,20 @@ static void modify_dynsym(elf_link_t *elf_link) Elf64_Shdr *sec = find_tmp_section_by_name(elf_link, ".dynsym"); modify_symbol(elf_link, sec); + // defined and undefined symbol from elfs all in dynsym // delete undefined symbol, so dlsym can find the addr delete_undefined_symbol(&elf_link->out_ef, sec); - sort_symbol_table(&elf_link->out_ef, sec); - // sh_info is STB_LOCAL symbol count sec->sh_info = get_local_symbol_count(&elf_link->out_ef, sec); + // nold mode, libc gun_hash no change, dynsym no sort + if (is_static_nold_mode(elf_link)) { + return; + } + + sort_symbol_table(&elf_link->out_ef, sec); + Elf64_Shdr *dyn = sec; sec = find_tmp_section_by_name(elf_link, ".gnu.hash"); modify_hash(&elf_link->out_ef, sec, dyn, elf_link->out_ef.dynstr_data); diff --git a/src/elf_read_elf.c b/src/elf_read_elf.c index 17d218c..29cd4a7 100644 --- a/src/elf_read_elf.c +++ b/src/elf_read_elf.c @@ -179,7 +179,7 @@ int elf_find_dynsym_index_by_name(elf_file_t *ef, const char *name) { Elf64_Sym *sym = elf_find_dynsym_by_name(ef, name); if (sym == NULL) { - si_panic("%s\n", name); + si_panic("%s %s\n", ef->file_name, name); } return elf_get_dynsym_index(ef, sym); diff --git a/src/elf_read_elf.h b/src/elf_read_elf.h index 638fa33..28f0e9a 100644 --- a/src/elf_read_elf.h +++ b/src/elf_read_elf.h @@ -239,6 +239,15 @@ Elf64_Dyn *elf_find_dyn_by_type(elf_file_t *ef, unsigned long dt); // section +static inline bool elf_is_rela_dyn_name(const char *name) +{ + if (strcmp(name, ".rela.dyn") == 0) { + return true; + } + + return false; +} + static inline bool elf_is_rela_plt_name(const char *name) { if (strcmp(name, ".rela.plt") == 0) { diff --git a/src/elf_relocation.c b/src/elf_relocation.c index 18633bc..cd86035 100644 --- a/src/elf_relocation.c +++ b/src/elf_relocation.c @@ -122,6 +122,13 @@ static void rela_change_to_relative(Elf64_Rela *dst_rela, unsigned long addend) // offset modify by caller } +static void fix_rela_new_index(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +{ + unsigned int old_index = ELF64_R_SYM(src_rela->r_info); + int new_index = get_new_sym_index(elf_link, src_ef, old_index); + dst_rela->r_info = ELF64_R_INFO(new_index, ELF64_R_TYPE(src_rela->r_info)); +} + // The __stack_chk_guard and __stack_chk_fail symbols are normally supplied by a GCC library called libssp // we can not change code to direct access the symbol, some code use 2 insn to point symbol, the adrp insn may be shared static void modify_rela_to_RELATIVE(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) @@ -134,9 +141,18 @@ static void modify_rela_to_RELATIVE(elf_link_t *elf_link, elf_file_t *src_ef, El // some addr need be 0, use by cmp jump char *name = elf_get_sym_name(src_ef, sym); if (!is_symbol_maybe_undefined(name)) { - si_panic("%s\n", name); + si_panic("symbol can not be UND, %s %s\n", src_ef->file_name, name); + } + + // nold mode dynsym is only libc, clear UND rela + // FEATURE: to support UND symbol + if (is_static_nold_mode(elf_link)) { + elf_clear_rela(dst_rela); + return; } - // do nothing + + // UND symbol need new index + fix_rela_new_index(elf_link, src_ef, src_rela, dst_rela); return; } @@ -220,39 +236,56 @@ static void rela_use_relative(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Re rela_change_to_relative(dst_rela, new_addr_in_data); } +static void fix_rela_tls_offset(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +{ + // TODO: static mode, x86, tls offset is imm value, so rela need clear + + // Offset in initial TLS block + // 00000000001f0d78 0000000000000012 R_X86_64_TPOFF64 38 + // TLS type have no sym index + dst_rela->r_addend = elf_get_new_tls_offset(elf_link, src_ef, src_rela->r_addend); + // 00000000001ebf38 0000052e00000012 R_X86_64_TPOFF64 0000000000000040 __libc_dlerror_result@@GLIBC_PRIVATE + 0 + // force clear sym index + dst_rela->r_info = ELF64_R_INFO(0, ELF64_R_TYPE(src_rela->r_info)); + SI_LOG_DEBUG("%s offset %lx info %lx\n", src_ef->file_name, src_rela->r_offset, src_rela->r_info); +} + +static void fix_rela_got_entry(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) +{ + // set addr of so path list + if (elf_link->hook_func) { + // .got var point to ___g_so_path_list data area, change point to real addr + // .rela.dyn + // 0000000000003ff0 0000003000000006 R_X86_64_GLOB_DAT 0000000000004000 ___g_so_path_list + 0 + // .rela.text + // 000000000000129d 0000006e0000002a R_X86_64_REX_GOTPCRELX 0000000000004000 ___g_so_path_list - 4 + // 129a: 4c 8b 2d 4f 2d 00 00 mov 0x2d4f(%rip),%r13 # 3ff0 <___g_so_path_list@@Base-0x10> + // 48: 0000000000004000 4096 OBJECT GLOBAL DEFAULT 27 ___g_so_path_list + unsigned int old_index = ELF64_R_SYM(src_rela->r_info); + const char *sym_name = elf_get_dynsym_name_by_index(src_ef, old_index); + if (elf_is_same_symbol_name(sym_name, "___g_so_path_list")) { + // when ELF load, real addr will set + rela_change_to_relative(dst_rela, (unsigned long)elf_link->so_path_struct); + return; + } + } + + // some symbol do not export in .dynsym, change to R_AARCH64_RELATIVE + modify_rela_to_RELATIVE(elf_link, src_ef, src_rela, dst_rela); +} + void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela *src_rela, Elf64_Rela *dst_rela) { - int type; Elf64_Sym *sym = elf_get_dynsym_by_rela(src_ef, src_rela); // modify offset dst_rela->r_offset = get_new_addr_by_old_addr(elf_link, src_ef, src_rela->r_offset); - unsigned int old_index = ELF64_R_SYM(src_rela->r_info); - - type = ELF64_R_TYPE(src_rela->r_info); + int type = ELF64_R_TYPE(src_rela->r_info); switch (type) { case R_X86_64_GLOB_DAT: - // set addr of so path list - if (elf_link->hook_func) { - // .got var point to ___g_so_path_list data area, change point to real addr - // .rela.dyn - // 0000000000003ff0 0000003000000006 R_X86_64_GLOB_DAT 0000000000004000 ___g_so_path_list + 0 - // .rela.text - // 000000000000129d 0000006e0000002a R_X86_64_REX_GOTPCRELX 0000000000004000 ___g_so_path_list - 4 - // 129a: 4c 8b 2d 4f 2d 00 00 mov 0x2d4f(%rip),%r13 # 3ff0 <___g_so_path_list@@Base-0x10> - // 48: 0000000000004000 4096 OBJECT GLOBAL DEFAULT 27 ___g_so_path_list - const char *sym_name = elf_get_dynsym_name_by_index(src_ef, old_index); - if (elf_is_same_symbol_name(sym_name, "___g_so_path_list")) { - // when ELF load, real addr will set - rela_change_to_relative(dst_rela, (unsigned long)elf_link->so_path_struct); - break; - } - } - fallthrough; case R_AARCH64_GLOB_DAT: - // some symbol do not export in .dynsym, change to R_AARCH64_RELATIVE - modify_rela_to_RELATIVE(elf_link, src_ef, src_rela, dst_rela); + fix_rela_got_entry(elf_link, src_ef, src_rela, dst_rela); break; case R_X86_64_64: case R_AARCH64_ABS64: @@ -284,10 +317,7 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * break; case R_X86_64_TPOFF64: case R_X86_64_TPOFF32: - // Offset in initial TLS block - // 00000000001f0d78 0000000000000012 R_X86_64_TPOFF64 38 - // TLS type have no sym index - dst_rela->r_addend = elf_get_new_tls_offset(elf_link, src_ef, src_rela->r_addend); + fix_rela_tls_offset(elf_link, src_ef, src_rela, dst_rela); break; case R_X86_64_COPY: rela_use_relative(elf_link, src_ef, src_rela, dst_rela); @@ -296,10 +326,7 @@ void modify_rela_dyn_item(elf_link_t *elf_link, elf_file_t *src_ef, Elf64_Rela * // Variables in the bss section, some from glibc, some declared by the application // Redefined in the template file temporarily, so skip here // TODO: is really do nothing? - { - int new_index = get_new_sym_index(elf_link, src_ef, old_index); - dst_rela->r_info = ELF64_R_INFO(new_index, ELF64_R_TYPE(src_rela->r_info)); - } + fix_rela_new_index(elf_link, src_ef, src_rela, dst_rela); break; case R_AARCH64_NONE: /* nothing need to do */ diff --git a/src/elf_write_elf.c b/src/elf_write_elf.c index ade6a22..5d6fbb7 100644 --- a/src/elf_write_elf.c +++ b/src/elf_write_elf.c @@ -278,9 +278,9 @@ static void record_rela_arr(elf_link_t *elf_link, elf_file_t *ef, Elf64_Shdr *se void *src = ((void *)ef->hdr) + sec->sh_offset; - if (is_rela_plt_name(name)) { + if (elf_is_rela_plt_name(name)) { arr = elf_link->rela_plt_arr; - } else if (is_rela_dyn_name(name)) { + } else if (elf_is_rela_dyn_name(name)) { arr = elf_link->rela_dyn_arr; } else { return; diff --git a/tests/bash/Makefile b/tests/bash/Makefile index d494bcd..936fb19 100644 --- a/tests/bash/Makefile +++ b/tests/bash/Makefile @@ -48,6 +48,8 @@ bash: $(SYSBOOST) $(MODE) $(BASH) $(LIBTINFO) $(LIBC) readelf -W -a $(BASH).rto > bash.rto.elf objdump -d $(BASH).rto > bash.rto.asm + readelf -W -s $(BASH).rto > bash.rto.dynsym.elf + readelf -W -s $(LIBC_R) > libc.so.dynsym.elf @echo ===run=== $(BASH).rto -c "echo 1" -- Gitee