From a358aa38e205cc78cd904d95fc2746282cf0271d Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich Date: Sun, 30 Jul 2023 17:32:04 -0700 Subject: [PATCH 01/47] [BOLT][DWARF] Update handling of size 1 ranges and fix sub-programs with ranges When output range is only one entry, and input is low_pc/high_pc do not convert to ranges. This helps with size of .debug_ranges/.debug_rnglists. It also helps when either low_pc/high_pc is 0. We not generating potentially invalid ranges that result in LLDB error. Also fixed handling of DW_AT_subprogram with ranges. This can be created with -fbasic-block-sections=all. Reviewed By: maksfb Differential Revision: https://reviews.llvm.org/D156374 --- bolt/include/bolt/Core/DIEBuilder.h | 1 - bolt/include/bolt/Rewrite/DWARFRewriter.h | 4 +- bolt/lib/Rewrite/DWARFRewriter.cpp | 140 +++-- bolt/test/AArch64/go_dwarf.test | 5 +- .../X86/Inputs/debug-fission-simple-convert.s | 446 ++++++++++++++++ ...f-do-no-convert-low-pc-high-pc-to-ranges.s | 501 ++++++++++++++++++ ...4-do-no-convert-low-pc-high-pc-to-ranges.s | 421 +++++++++++++++ .../dwarf4-subprogram-multiple-ranges-main.s | 331 ++++++++++++ .../dwarf4-subprogram-single-gc-ranges-main.s | 310 +++++++++++ .../dwarf4-subprogram-single-ranges-main.s | 309 +++++++++++ ...5-do-no-convert-low-pc-high-pc-to-ranges.s | 478 +++++++++++++++++ .../dwarf5-subprogram-multiple-ranges-main.s | 385 ++++++++++++++ .../dwarf5-subprogram-single-gc-ranges-main.s | 359 +++++++++++++ .../dwarf5-subprogram-single-ranges-main.s | 357 +++++++++++++ bolt/test/X86/debug-fission-single-convert.s | 76 +++ bolt/test/X86/debug-fission-single.s | 17 +- ...o-no-convert-low-pc-high-pc-to-ranges.test | 24 + bolt/test/X86/dwarf4-df-dualcu-loclist.test | 12 +- bolt/test/X86/dwarf4-df-dualcu.test | 2 +- ...o-no-convert-low-pc-high-pc-to-ranges.test | 20 + .../X86/dwarf4-size-0-inlined_subroutine.s | 4 +- .../dwarf4-subprogram-multiple-ranges.test | 23 + .../dwarf4-subprogram-single-gc-ranges.test | 23 + .../X86/dwarf4-subprogram-single-ranges.test | 25 + bolt/test/X86/dwarf4-types-dwarf5-types.test | 4 +- bolt/test/X86/dwarf4-types-dwarf5.test | 2 +- bolt/test/X86/dwarf5-df-dualcu-loclist.test | 4 +- bolt/test/X86/dwarf5-df-dualcu.test | 2 +- bolt/test/X86/dwarf5-df-mono-dualcu.test | 2 +- ...o-no-convert-low-pc-high-pc-to-ranges.test | 20 + ...4-gdb-index-types-gdb-generated-gdb11.test | 4 +- ...f4-gdb-index-types-gdb-generated-gdb9.test | 4 +- ...-dwarf4-gdb-index-types-lld-generated.test | 4 +- bolt/test/X86/dwarf5-dwarf4-monolithic.test | 2 +- ...ypes-backward-forward-cross-reference.test | 8 +- ...arf5-ftypes-dwo-mono-input-dwp-output.test | 16 +- ...5-gdb-index-types-gdb-generated-gdb11.test | 4 +- ...f5-gdb-index-types-gdb-generated-gdb9.test | 4 +- .../dwarf5-gdb-index-types-lld-generated.test | 4 +- bolt/test/X86/dwarf5-locaddrx.test | 2 +- bolt/test/X86/dwarf5-locexpr-referrence.test | 4 +- bolt/test/X86/dwarf5-lowpc-highpc-convert.s | 2 +- .../X86/dwarf5-one-loclists-two-bases.test | 2 +- .../X86/dwarf5-rangeoffset-to-rangeindex.s | 2 +- .../X86/dwarf5-split-dwarf4-monolithic.test | 2 +- .../dwarf5-subprogram-multiple-ranges.test | 23 + .../dwarf5-subprogram-single-gc-ranges.test | 23 + .../X86/dwarf5-subprogram-single-ranges.test | 25 + bolt/test/X86/dwarf5-two-loclists.test | 2 +- bolt/test/X86/dwarf5-two-rnglists.test | 2 +- .../dwarf5-types-backward-cross-reference.s | 2 +- .../dwarf5-types-forward-cross-reference.s | 2 +- bolt/test/X86/gdbindex.test | 4 +- 53 files changed, 4344 insertions(+), 110 deletions(-) create mode 100644 bolt/test/X86/Inputs/debug-fission-simple-convert.s create mode 100644 bolt/test/X86/Inputs/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.s create mode 100644 bolt/test/X86/Inputs/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.s create mode 100644 bolt/test/X86/Inputs/dwarf4-subprogram-multiple-ranges-main.s create mode 100644 bolt/test/X86/Inputs/dwarf4-subprogram-single-gc-ranges-main.s create mode 100644 bolt/test/X86/Inputs/dwarf4-subprogram-single-ranges-main.s create mode 100644 bolt/test/X86/Inputs/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.s create mode 100644 bolt/test/X86/Inputs/dwarf5-subprogram-multiple-ranges-main.s create mode 100644 bolt/test/X86/Inputs/dwarf5-subprogram-single-gc-ranges-main.s create mode 100644 bolt/test/X86/Inputs/dwarf5-subprogram-single-ranges-main.s create mode 100644 bolt/test/X86/debug-fission-single-convert.s create mode 100644 bolt/test/X86/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.test create mode 100644 bolt/test/X86/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.test create mode 100644 bolt/test/X86/dwarf4-subprogram-multiple-ranges.test create mode 100644 bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test create mode 100644 bolt/test/X86/dwarf4-subprogram-single-ranges.test create mode 100644 bolt/test/X86/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.test create mode 100644 bolt/test/X86/dwarf5-subprogram-multiple-ranges.test create mode 100644 bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test create mode 100644 bolt/test/X86/dwarf5-subprogram-single-ranges.test diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h index 2eac4ceba4cb..4034cb4ac118 100644 --- a/bolt/include/bolt/Core/DIEBuilder.h +++ b/bolt/include/bolt/Core/DIEBuilder.h @@ -359,7 +359,6 @@ public: return Die->replaceValue(getState().DIEAlloc, Attribute, Form, NewValue); } - template bool deleteValue(DIEValueList *Die, dwarf::Attribute Attribute) { return Die->deleteValue(Attribute); } diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 2de9c7139822..c0ec3868041f 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -125,7 +125,7 @@ private: /// attribute. void updateDWARFObjectAddressRanges( DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die, - uint64_t DebugRangesOffset, uint64_t LowPCToUse, + uint64_t DebugRangesOffset, std::optional RangesBase = std::nullopt); std::unique_ptr @@ -173,7 +173,7 @@ private: void convertToRangesPatchDebugInfo( DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die, uint64_t RangesSectionOffset, DIEValue &LowPCAttrInfo, - DIEValue &HighPCAttrInfo, uint64_t LowPCToUse, + DIEValue &HighPCAttrInfo, std::optional RangesBase = std::nullopt); /// Adds a \p Str to .debug_str section. diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 85224ddbc803..a2408b75779c 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -246,6 +246,12 @@ static cl::opt BatchSize( "Specifies the size of batches for processing CUs. Higher number has " "better performance, but more memory usage. Default value is 1."), cl::Hidden, cl::init(1), cl::cat(BoltCategory)); + +static cl::opt AlwaysConvertToRanges( + "always-convert-to-ranges", + cl::desc("This option is for testing purposes only. It forces BOLT to " + "convert low_pc/high_pc to ranges always."), + cl::ReallyHidden, cl::init(false), cl::cat(BoltCategory)); } // namespace opts static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU, @@ -693,6 +699,45 @@ void DWARFRewriter::updateUnitDebugInfo( const std::vector> &DIs = DIEBldr.getDIEsByUnit(Unit); + // Either updates or normalizes DW_AT_range to DW_AT_low_pc and DW_AT_high_pc. + auto updateLowPCHighPC = [&](DIE *Die, const DIEValue &LowPCVal, + const DIEValue &HighPCVal, uint64_t LowPC, + const uint64_t HighPC) { + dwarf::Attribute AttrLowPC = dwarf::DW_AT_low_pc; + dwarf::Form FormLowPC = dwarf::DW_FORM_addr; + dwarf::Attribute AttrHighPC = dwarf::DW_AT_high_pc; + dwarf::Form FormHighPC = dwarf::DW_FORM_data4; + const uint32_t Size = HighPC - LowPC; + // Whatever was generated is not low_pc/high_pc, so will reset to + // default for size 1. + if (!LowPCVal || !HighPCVal) { + if (Unit.getVersion() >= 5) + FormLowPC = dwarf::DW_FORM_addrx; + else if (Unit.isDWOUnit()) + FormLowPC = dwarf::DW_FORM_GNU_addr_index; + } else { + AttrLowPC = LowPCVal.getAttribute(); + FormLowPC = LowPCVal.getForm(); + AttrHighPC = HighPCVal.getAttribute(); + FormHighPC = HighPCVal.getForm(); + } + + if (FormLowPC == dwarf::DW_FORM_addrx || + FormLowPC == dwarf::DW_FORM_GNU_addr_index) + LowPC = AddrWriter->getIndexFromAddress(LowPC, Unit); + + if (LowPCVal) + DIEBldr.replaceValue(Die, AttrLowPC, FormLowPC, DIEInteger(LowPC)); + else + DIEBldr.addValue(Die, AttrLowPC, FormLowPC, DIEInteger(LowPC)); + if (HighPCVal) { + DIEBldr.replaceValue(Die, AttrHighPC, FormHighPC, DIEInteger(Size)); + } else { + DIEBldr.deleteValue(Die, dwarf::DW_AT_ranges); + DIEBldr.addValue(Die, AttrHighPC, FormHighPC, DIEInteger(Size)); + } + }; + for (const std::unique_ptr &DI : DIs) { DIE *Die = DI->Die; switch (Die->getTag()) { @@ -726,7 +771,7 @@ void DWARFRewriter::updateUnitDebugInfo( ARangesSectionWriter->addCURanges(Unit.getOffset(), std::move(OutputRanges)); updateDWARFObjectAddressRanges(Unit, DIEBldr, *Die, RangesSectionOffset, - 0, RangesBase); + RangesBase); DIEValue StmtListAttrVal = Die->findAttribute(dwarf::DW_AT_stmt_list); if (LineTablePatchMap.count(&Unit)) DIEBldr.replaceValue(Die, dwarf::DW_AT_stmt_list, @@ -737,8 +782,9 @@ void DWARFRewriter::updateUnitDebugInfo( case dwarf::DW_TAG_subprogram: { // Get function address either from ranges or [LowPC, HighPC) pair. - uint64_t Address; + uint64_t Address = UINT64_MAX; uint64_t SectionIndex, HighPC; + DebugAddressRangesVector FunctionRanges; if (!getLowAndHighPC(*Die, Unit, Address, HighPC, SectionIndex)) { Expected RangesOrError = getDIEAddressRanges(*Die, Unit); @@ -751,23 +797,41 @@ void DWARFRewriter::updateUnitDebugInfo( if (Ranges.empty()) break; - Address = Ranges.front().LowPC; + for (const DWARFAddressRange &Range : Ranges) { + if (const BinaryFunction *Function = + BC.getBinaryFunctionAtAddress(Range.LowPC)) + FunctionRanges.append(Function->getOutputAddressRanges()); + } + } else { + if (const BinaryFunction *Function = + BC.getBinaryFunctionAtAddress(Address)) + FunctionRanges = Function->getOutputAddressRanges(); } // Clear cached ranges as the new function will have its own set. CachedRanges.clear(); + DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc); + DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc); + if (FunctionRanges.empty()) { + if (LowPCVal && HighPCVal) { + FunctionRanges.push_back({0, HighPCVal.getDIEInteger().getValue()}); + } else { + // I haven't seen this case, but who knows what other compilers + // generate. + FunctionRanges.push_back({0, 1}); + errs() << "BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed " + "by the linker, DW_AT_ranges is used\n"; + } + } - DebugAddressRangesVector FunctionRanges; - if (const BinaryFunction *Function = - BC.getBinaryFunctionAtAddress(Address)) - FunctionRanges = Function->getOutputAddressRanges(); - - if (FunctionRanges.empty()) - FunctionRanges.push_back({0, 0}); + if (FunctionRanges.size() == 1 && !opts::AlwaysConvertToRanges) { + updateLowPCHighPC(Die, LowPCVal, HighPCVal, FunctionRanges.back().LowPC, + FunctionRanges.back().HighPC); + break; + } updateDWARFObjectAddressRanges( - Unit, DIEBldr, *Die, RangesSectionWriter.addRanges(FunctionRanges), - 0); + Unit, DIEBldr, *Die, RangesSectionWriter.addRanges(FunctionRanges)); break; } @@ -783,37 +847,33 @@ void DWARFRewriter::updateUnitDebugInfo( ? BC.getBinaryFunctionContainingAddress( RangesOrError->front().LowPC) : nullptr; - bool ErrorState = false; - std::optional NewLowPC; + DebugAddressRangesVector OutputRanges; if (Function) { - DebugAddressRangesVector OutputRanges = - Function->translateInputToOutputRanges(*RangesOrError); + OutputRanges = Function->translateInputToOutputRanges(*RangesOrError); LLVM_DEBUG(if (OutputRanges.empty() != RangesOrError->empty()) { dbgs() << "BOLT-DEBUG: problem with DIE at 0x" << Twine::utohexstr(Die->getOffset()) << " in CU at 0x" << Twine::utohexstr(Unit.getOffset()) << '\n'; }); - if (!OutputRanges.empty()) - NewLowPC = OutputRanges.front().LowPC; - RangesSectionOffset = RangesSectionWriter.addRanges( - std::move(OutputRanges), CachedRanges); + if (opts::AlwaysConvertToRanges || OutputRanges.size() > 1) { + RangesSectionOffset = RangesSectionWriter.addRanges( + std::move(OutputRanges), CachedRanges); + OutputRanges.clear(); + } else if (OutputRanges.empty()) { + OutputRanges.push_back({RangesOrError.get().front().LowPC, + RangesOrError.get().front().HighPC}); + } } else if (!RangesOrError) { - ErrorState = true; consumeError(RangesOrError.takeError()); } - - uint64_t LowPCToUse = 0; - if (!ErrorState && RangesOrError.get().size() == 1 && - RangesOrError.get().begin()->LowPC == - RangesOrError.get().begin()->HighPC) { - if (NewLowPC) - LowPCToUse = NewLowPC.value(); - else - LowPCToUse = RangesOrError.get().begin()->LowPC; + DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc); + DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc); + if (OutputRanges.size() == 1) { + updateLowPCHighPC(Die, LowPCVal, HighPCVal, OutputRanges.back().LowPC, + OutputRanges.back().HighPC); + break; } - - updateDWARFObjectAddressRanges(Unit, DIEBldr, *Die, RangesSectionOffset, - LowPCToUse); + updateDWARFObjectAddressRanges(Unit, DIEBldr, *Die, RangesSectionOffset); break; } case dwarf::DW_TAG_call_site: { @@ -1147,7 +1207,7 @@ void DWARFRewriter::updateUnitDebugInfo( void DWARFRewriter::updateDWARFObjectAddressRanges( DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die, uint64_t DebugRangesOffset, - uint64_t LowPCToUse, std::optional RangesBase) { + std::optional RangesBase) { if (RangesBase) { // If DW_AT_GNU_ranges_base is present, update it. No further modifications @@ -1195,7 +1255,7 @@ void DWARFRewriter::updateDWARFObjectAddressRanges( LowPCAttrInfo.getForm() != dwarf::DW_FORM_GNU_addr_index && LowPCAttrInfo.getForm() != dwarf::DW_FORM_addrx) DIEBldr.replaceValue(&Die, dwarf::DW_AT_low_pc, LowPCAttrInfo.getForm(), - DIEInteger(LowPCToUse)); + DIEInteger(0)); return; } @@ -1223,8 +1283,7 @@ void DWARFRewriter::updateDWARFObjectAddressRanges( if (LowPCAttrInfo && HighPCAttrInfo) { convertToRangesPatchDebugInfo(Unit, DIEBldr, Die, DebugRangesOffset, - LowPCAttrInfo, HighPCAttrInfo, LowPCToUse, - RangesBase); + LowPCAttrInfo, HighPCAttrInfo, RangesBase); } else if (!(Unit.isDWOUnit() && Die.getTag() == dwarf::DW_TAG_compile_unit)) { if (opts::Verbosity >= 1) @@ -2086,8 +2145,7 @@ DWARFRewriter::makeFinalLocListsSection(DWARFVersion Version) { void DWARFRewriter::convertToRangesPatchDebugInfo( DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die, uint64_t RangesSectionOffset, DIEValue &LowPCAttrInfo, - DIEValue &HighPCAttrInfo, uint64_t LowPCToUse, - std::optional RangesBase) { + DIEValue &HighPCAttrInfo, std::optional RangesBase) { uint32_t BaseOffset = 0; dwarf::Form LowForm = LowPCAttrInfo.getForm(); dwarf::Attribute RangeBaseAttribute = dwarf::DW_AT_GNU_ranges_base; @@ -2120,12 +2178,12 @@ void DWARFRewriter::convertToRangesPatchDebugInfo( // DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is // when it's absent. if (LowForm == dwarf::DW_FORM_addrx) { - const uint32_t Index = AddrWriter->getIndexFromAddress(LowPCToUse, Unit); + const uint32_t Index = AddrWriter->getIndexFromAddress(0, Unit); DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(), LowPCAttrInfo.getForm(), DIEInteger(Index)); } else DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(), - LowPCAttrInfo.getForm(), DIEInteger(LowPCToUse)); + LowPCAttrInfo.getForm(), DIEInteger(0)); // Original CU didn't have DW_AT_*_base. We converted it's children (or // dwo), so need to insert it into CU. diff --git a/bolt/test/AArch64/go_dwarf.test b/bolt/test/AArch64/go_dwarf.test index c901835cf69a..7d2765a1c7fb 100644 --- a/bolt/test/AArch64/go_dwarf.test +++ b/bolt/test/AArch64/go_dwarf.test @@ -49,6 +49,5 @@ CHECK-NEXT: DW_AT_decl_file CHECK-NEXT: DW_AT_decl_line (1) CHECK-NEXT: DW_AT_decl_column (0x05) CHECK-NEXT: DW_AT_type -CHECK-NEXT: DW_AT_low_pc (0x0000000000000000) -CHECK-NEXT: DW_AT_ranges (0x00000030 -CHECK-NEXT: [0x0000000000000660, 0x0000000000000684)) +CHECK-NEXT: DW_AT_low_pc (0x0000000000000660) +CHECK-NEXT: DW_AT_high_pc (0x0000000000000024) diff --git a/bolt/test/X86/Inputs/debug-fission-simple-convert.s b/bolt/test/X86/Inputs/debug-fission-simple-convert.s new file mode 100644 index 000000000000..da434207d5da --- /dev/null +++ b/bolt/test/X86/Inputs/debug-fission-simple-convert.s @@ -0,0 +1,446 @@ + .text + .file "debug-fission-simple.cpp" + .file 1 "" "debug-fission-simple.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .loc 1 3 0 # debug-fission-simple.cpp:3:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 1 4 11 prologue_end # debug-fission-simple.cpp:4:11 + cmpl $5, -4(%rbp) +.Ltmp1: + .loc 1 4 7 is_stmt 0 # debug-fission-simple.cpp:4:7 + jne .LBB0_2 +# %bb.1: # %if.then +.Ltmp2: + .loc 1 5 16 is_stmt 1 # debug-fission-simple.cpp:5:16 + movl _ZL3foo, %eax + .loc 1 5 14 is_stmt 0 # debug-fission-simple.cpp:5:14 + addl $1, %eax + .loc 1 5 9 # debug-fission-simple.cpp:5:9 + addl -4(%rbp), %eax + movl %eax, -4(%rbp) + .loc 1 5 5 # debug-fission-simple.cpp:5:5 + jmp .LBB0_3 +.LBB0_2: # %if.else + .loc 1 7 9 is_stmt 1 # debug-fission-simple.cpp:7:9 + movl -4(%rbp), %eax + subl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: +.LBB0_3: # %if.end + .loc 1 8 10 # debug-fission-simple.cpp:8:10 + movl -4(%rbp), %eax + .loc 1 8 3 is_stmt 0 # debug-fission-simple.cpp:8:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + .cfi_endproc + # -- End function + .section .text._Z8doStuff2i,"ax",@progbits + .globl _Z8doStuff2i # -- Begin function _Z8doStuff2i + .p2align 4, 0x90 + .type _Z8doStuff2i,@function +_Z8doStuff2i: # @_Z8doStuff2i +.Lfunc_begin1: + .loc 1 11 0 is_stmt 1 # debug-fission-simple.cpp:11:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp5: + .loc 1 12 14 prologue_end # debug-fission-simple.cpp:12:14 + movl -4(%rbp), %eax + addl $3, %eax + movl %eax, -4(%rbp) + .loc 1 12 3 is_stmt 0 # debug-fission-simple.cpp:12:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end1: + .size _Z8doStuff2i, .Lfunc_end1-_Z8doStuff2i + .cfi_endproc + # -- End function + .section .text._Z6_startv,"ax",@progbits + .globl _Z6_startv # -- Begin function _Z6_startv + .p2align 4, 0x90 + .type _Z6_startv,@function +_Z6_startv: # @_Z6_startv +.Lfunc_begin2: + .loc 1 15 0 is_stmt 1 # debug-fission-simple.cpp:15:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp +.Ltmp7: + .loc 1 16 7 prologue_end # debug-fission-simple.cpp:16:7 + movl $4, -4(%rbp) + .loc 1 17 18 # debug-fission-simple.cpp:17:18 + movl -4(%rbp), %edi + .loc 1 17 10 is_stmt 0 # debug-fission-simple.cpp:17:10 + callq _Z7doStuffi + .loc 1 17 3 # debug-fission-simple.cpp:17:3 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp8: +.Lfunc_end2: + .size _Z6_startv, .Lfunc_end2-_Z6_startv + .cfi_endproc + # -- End function + .type _ZL3foo,@object # @_ZL3foo + .data + .p2align 2 +_ZL3foo: + .long 2 # 0x2 + .size _ZL3foo, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad 436953012669069206 # DW_AT_GNU_dwo_id + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad .Lfunc_begin2 + .quad .Lfunc_end2 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "" # string offset=0 +.Lskel_string1: + .asciz "debug-fission-simple-convert.dwo" # string offset=47 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "foo" # string offset=0 +.Linfo_string1: + .asciz "int" # string offset=4 +.Linfo_string2: + .asciz "_ZL3foo" # string offset=8 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=16 +.Linfo_string4: + .asciz "doStuff" # string offset=28 +.Linfo_string5: + .asciz "_Z8doStuff2i" # string offset=36 +.Linfo_string6: + .asciz "doStuff2" # string offset=49 +.Linfo_string7: + .asciz "_Z6_startv" # string offset=58 +.Linfo_string8: + .asciz "_start" # string offset=69 +.Linfo_string9: + .asciz "val" # string offset=76 +.Linfo_string10: + .asciz "clang version 13.0.0" # string offset=80 +.Linfo_string11: + .asciz "debug-fission-simple.cpp" # string offset=214 +.Linfo_string12: + .asciz "debug-fission-simple-convert.dwo" # string offset=239 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 4 + .long 8 + .long 16 + .long 28 + .long 36 + .long 49 + .long 58 + .long 69 + .long 76 + .long 80 + .long 214 + .long 239 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x73 DW_TAG_compile_unit + .byte 10 # DW_AT_producer + .short 4 # DW_AT_language + .byte 11 # DW_AT_name + .byte 12 # DW_AT_GNU_dwo_name + .quad 436953012669069206 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0xc DW_TAG_variable + .byte 0 # DW_AT_name + .long 37 # DW_AT_type + .byte 1 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .byte 2 # DW_AT_location + .byte 251 + .byte 0 + .byte 2 # DW_AT_linkage_name + .byte 3 # Abbrev [3] 0x25:0x4 DW_TAG_base_type + .byte 1 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 4 # Abbrev [4] 0x29:0x1c DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x39:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x45:0x1c DW_TAG_subprogram + .byte 2 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 5 # DW_AT_linkage_name + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 11 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x55:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 11 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x61:0x1c DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 7 # DW_AT_linkage_name + .byte 8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 15 # DW_AT_decl_line + .long 37 # DW_AT_type + # DW_AT_external + .byte 6 # Abbrev [6] 0x71:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 16 # DW_AT_decl_line + .long 37 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad _ZL3foo + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Lfunc_begin2 + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 48 # Compilation Unit Length + .long 25 # DIE offset + .byte 160 # Attributes: VARIABLE, STATIC + .asciz "foo" # External Name + .long 41 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "doStuff" # External Name + .long 69 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "doStuff2" # External Name + .long 97 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "_start" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 48 # Compilation Unit Length + .long 37 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 13" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .addrsig_sym _ZL3foo + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.s b/bolt/test/X86/Inputs/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.s new file mode 100644 index 000000000000..c0967672b05a --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.s @@ -0,0 +1,501 @@ + .text + .file "main.cpp" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 1 "." "main.cpp" + .loc 1 5 0 # main.cpp:5:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + pushq %rbx + .cfi_def_cfa_offset 16 + .cfi_offset %rbx, -16 + movl %edi, %ebx +.Ltmp0: + .loc 1 6 7 prologue_end # main.cpp:6:7 + cmpq $0, _Z13may_not_existv@GOTPCREL(%rip) + je .LBB0_2 +.Ltmp1: +# %bb.1: # %if.then + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- $rsi + .loc 1 7 5 # main.cpp:7:5 + callq _Z13may_not_existv@PLT +.Ltmp2: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi +.LBB0_2: # %if.end + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + #DEBUG_VALUE: main:j <- 0 + #DEBUG_VALUE: helper:i <- $ebx + .loc 1 2 10 # main.cpp:2:10 + incl %ebx +.Ltmp3: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + #DEBUG_VALUE: helper:i <- $ebx + #DEBUG_VALUE: main:j <- $ebx + .loc 1 10 3 # main.cpp:10:3 + movl %ebx, %eax + .loc 1 10 3 epilogue_begin is_stmt 0 # main.cpp:10:3 + popq %rbx +.Ltmp4: + #DEBUG_VALUE: helper:i <- $eax + #DEBUG_VALUE: main:j <- $eax + .cfi_def_cfa_offset 8 + retq +.Ltmp5: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_loc.dwo,"e",@progbits +.Ldebug_loc0: + .byte 3 + .byte 0 + .long .Ltmp1-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 3 + .byte 2 + .long .Ltmp3-.Ltmp1 + .short 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .byte 3 + .byte 3 + .long .Lfunc_end0-.Ltmp3 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 +.Ldebug_loc1: + .byte 3 + .byte 0 + .long .Ltmp2-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .byte 3 + .byte 1 + .long .Lfunc_end0-.Ltmp2 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .byte 0 +.Ldebug_loc2: + .byte 3 + .byte 1 + .long .Ltmp3-.Ltmp2 + .short 3 # Loc expr size + .byte 17 # DW_OP_consts + .byte 0 # 0 + .byte 159 # DW_OP_stack_value + .byte 3 + .byte 3 + .long .Ltmp4-.Ltmp3 + .short 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .byte 3 + .byte 4 + .long .Lfunc_end0-.Ltmp4 + .short 1 # Loc expr size + .byte 80 # super-register DW_OP_reg0 + .byte 0 + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .ascii "\260B" # DW_AT_GNU_dwo_name + .byte 14 # DW_FORM_strp + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .ascii "\263B" # DW_AT_GNU_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lskel_string0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .long .Lskel_string1 # DW_AT_GNU_dwo_name + .quad -5076722043903325778 # DW_AT_GNU_dwo_id + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_GNU_addr_base +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "." # string offset=0 +.Lskel_string1: + .asciz "main.dwo" # string offset=38 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "_ZL6helperi" # string offset=0 +.Linfo_string1: + .asciz "helper" # string offset=12 +.Linfo_string2: + .asciz "int" # string offset=19 +.Linfo_string3: + .asciz "i" # string offset=23 +.Linfo_string4: + .asciz "_Z13may_not_existv" # string offset=25 +.Linfo_string5: + .asciz "may_not_exist" # string offset=44 +.Linfo_string6: + .asciz "main" # string offset=58 +.Linfo_string7: + .asciz "argc" # string offset=63 +.Linfo_string8: + .asciz "argv" # string offset=68 +.Linfo_string9: + .asciz "char" # string offset=73 +.Linfo_string10: + .asciz "j" # string offset=78 +.Linfo_string11: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=80 +.Linfo_string12: + .asciz "main.cpp" # string offset=185 +.Linfo_string13: + .asciz "main.dwo" # string offset=194 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 12 + .long 19 + .long 23 + .long 25 + .long 44 + .long 58 + .long 63 + .long 68 + .long 73 + .long 78 + .long 80 + .long 185 + .long 194 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x88 DW_TAG_compile_unit + .byte 11 # DW_AT_producer + .short 33 # DW_AT_language + .byte 12 # DW_AT_name + .byte 13 # DW_AT_GNU_dwo_name + .quad -5076722043903325778 # DW_AT_GNU_dwo_id + .byte 2 # Abbrev [2] 0x19:0x13 DW_TAG_subprogram + .byte 0 # DW_AT_linkage_name + .byte 1 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 44 # DW_AT_type + .byte 1 # DW_AT_inline + .byte 3 # Abbrev [3] 0x23:0x8 DW_TAG_formal_parameter + .byte 3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 44 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x2c:0x4 DW_TAG_base_type + .byte 2 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 5 # Abbrev [5] 0x30:0x4f DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .byte 6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 44 # DW_AT_type + # DW_AT_external + .byte 6 # Abbrev [6] 0x3f:0xc DW_TAG_formal_parameter + .long .Ldebug_loc0-.debug_loc.dwo # DW_AT_location + .byte 7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 44 # DW_AT_type + .byte 6 # Abbrev [6] 0x4b:0xc DW_TAG_formal_parameter + .long .Ldebug_loc1-.debug_loc.dwo # DW_AT_location + .byte 8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 7 # Abbrev [7] 0x57:0xc DW_TAG_variable + .long .Ldebug_loc2-.debug_loc.dwo # DW_AT_location + .byte 10 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 44 # DW_AT_type + .byte 8 # Abbrev [8] 0x63:0x15 DW_TAG_inlined_subroutine + .long 25 # DW_AT_abstract_origin + .byte 1 # DW_AT_low_pc + .long .Ltmp3-.Ltmp2 # DW_AT_high_pc + .byte 1 # DW_AT_call_file + .byte 9 # DW_AT_call_line + .byte 32 # DW_AT_call_column + .byte 9 # Abbrev [9] 0x70:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 83 + .long 35 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x78:0x6 DW_TAG_GNU_call_site + .long 127 # DW_AT_abstract_origin + .byte 1 # DW_AT_low_pc + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x7f:0x5 DW_TAG_subprogram + .byte 4 # DW_AT_linkage_name + .byte 5 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 12 # Abbrev [12] 0x84:0x5 DW_TAG_pointer_type + .long 137 # DW_AT_type + .byte 12 # Abbrev [12] 0x89:0x5 DW_TAG_pointer_type + .long 142 # DW_AT_type + .byte 4 # Abbrev [4] 0x8e:0x4 DW_TAG_base_type + .byte 9 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\260B" # DW_AT_GNU_dwo_name + .ascii "\202>" # DW_FORM_GNU_str_index + .ascii "\261B" # DW_AT_GNU_dwo_id + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 32 # DW_AT_inline + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .ascii "\211\202\001" # DW_TAG_GNU_call_site + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .ascii "\201>" # DW_FORM_GNU_addr_index + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 110 # DW_AT_linkage_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 3 # DW_AT_name + .ascii "\202>" # DW_FORM_GNU_str_index + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Ltmp2 + .quad .Ltmp1 + .quad .Ltmp3 + .quad .Ltmp4 + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 48 # Compilation Unit Length + .long 48 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "main" # External Name + .long 25 # DIE offset + .byte 176 # Attributes: FUNCTION, STATIC + .asciz "helper" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 48 # Compilation Unit Length + .long 44 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "int" # External Name + .long 142 # DIE offset + .byte 144 # Attributes: TYPE, STATIC + .asciz "char" # External Name + .long 0 # End Mark +.LpubTypes_end0: + .weak _Z13may_not_existv + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z13may_not_existv + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.s b/bolt/test/X86/Inputs/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.s new file mode 100644 index 000000000000..e2d43addc519 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.s @@ -0,0 +1,421 @@ +# -g2 -gdwarf-4 main.cpp -O1 +# static int helper(int i) { +# return ++i; +# } +# void may_not_exist(void) __attribute__ ((weak)); +# int main(int argc, char *argv[]) { +# if (may_not_exist) +# may_not_exist(); +# int j = 0; +# [[clang::always_inline]] j = helper(argc); +# return j; +# } + + .text + .file "main.cpp" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 1 "." "main.cpp" + .loc 1 5 0 # main.cpp:5:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + pushq %rbx + .cfi_def_cfa_offset 16 + .cfi_offset %rbx, -16 + movl %edi, %ebx +.Ltmp0: + .loc 1 6 7 prologue_end # main.cpp:6:7 + cmpq $0, _Z13may_not_existv@GOTPCREL(%rip) + je .LBB0_2 +.Ltmp1: +# %bb.1: # %if.then + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- $rsi + .loc 1 7 5 # main.cpp:7:5 + callq _Z13may_not_existv@PLT +.Ltmp2: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi +.LBB0_2: # %if.end + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + #DEBUG_VALUE: main:j <- 0 + #DEBUG_VALUE: helper:i <- $ebx + .loc 1 2 10 # main.cpp:2:10 + incl %ebx +.Ltmp3: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + #DEBUG_VALUE: helper:i <- $ebx + #DEBUG_VALUE: main:j <- $ebx + .loc 1 10 3 # main.cpp:10:3 + movl %ebx, %eax + .loc 1 10 3 epilogue_begin is_stmt 0 # main.cpp:10:3 + popq %rbx +.Ltmp4: + #DEBUG_VALUE: helper:i <- $eax + #DEBUG_VALUE: main:j <- $eax + .cfi_def_cfa_offset 8 + retq +.Ltmp5: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_loc,"",@progbits +.Ldebug_loc0: + .quad .Lfunc_begin0-.Lfunc_begin0 + .quad .Ltmp1-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .quad .Ltmp1-.Lfunc_begin0 + .quad .Ltmp3-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .quad .Ltmp3-.Lfunc_begin0 + .quad .Lfunc_end0-.Lfunc_begin0 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .quad 0 + .quad 0 +.Ldebug_loc1: + .quad .Lfunc_begin0-.Lfunc_begin0 + .quad .Ltmp2-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .quad .Ltmp2-.Lfunc_begin0 + .quad .Lfunc_end0-.Lfunc_begin0 + .short 4 # Loc expr size + .byte 243 # DW_OP_GNU_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .quad 0 + .quad 0 +.Ldebug_loc2: + .quad .Ltmp2-.Lfunc_begin0 + .quad .Ltmp3-.Lfunc_begin0 + .short 3 # Loc expr size + .byte 17 # DW_OP_consts + .byte 0 # 0 + .byte 159 # DW_OP_stack_value + .quad .Ltmp3-.Lfunc_begin0 + .quad .Ltmp4-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .quad .Ltmp4-.Lfunc_begin0 + .quad .Lfunc_end0-.Lfunc_begin0 + .short 1 # Loc expr size + .byte 80 # super-register DW_OP_reg0 + .quad 0 + .quad 0 + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 32 # DW_AT_inline + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .ascii "\227B" # DW_AT_GNU_all_call_sites + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .ascii "\211\202\001" # DW_TAG_GNU_call_site + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0xcf DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x1c DW_TAG_subprogram + .long .Linfo_string3 # DW_AT_linkage_name + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 70 # DW_AT_type + .byte 1 # DW_AT_inline + .byte 3 # Abbrev [3] 0x3a:0xb DW_TAG_formal_parameter + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 70 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x46:0x7 DW_TAG_base_type + .long .Linfo_string5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 5 # Abbrev [5] 0x4d:0x70 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_GNU_all_call_sites + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 70 # DW_AT_type + # DW_AT_external + .byte 6 # Abbrev [6] 0x66:0xf DW_TAG_formal_parameter + .long .Ldebug_loc0 # DW_AT_location + .long .Linfo_string10 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 70 # DW_AT_type + .byte 6 # Abbrev [6] 0x75:0xf DW_TAG_formal_parameter + .long .Ldebug_loc1 # DW_AT_location + .long .Linfo_string11 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 200 # DW_AT_type + .byte 7 # Abbrev [7] 0x84:0xf DW_TAG_variable + .long .Ldebug_loc2 # DW_AT_location + .long .Linfo_string13 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 70 # DW_AT_type + .byte 8 # Abbrev [8] 0x93:0x1c DW_TAG_inlined_subroutine + .long 42 # DW_AT_abstract_origin + .quad .Ltmp2 # DW_AT_low_pc + .long .Ltmp3-.Ltmp2 # DW_AT_high_pc + .byte 1 # DW_AT_call_file + .byte 9 # DW_AT_call_line + .byte 32 # DW_AT_call_column + .byte 9 # Abbrev [9] 0xa7:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 83 + .long 58 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xaf:0xd DW_TAG_GNU_call_site + .long 189 # DW_AT_abstract_origin + .quad .Ltmp2 # DW_AT_low_pc + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0xbd:0xb DW_TAG_subprogram + .long .Linfo_string7 # DW_AT_linkage_name + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 12 # Abbrev [12] 0xc8:0x5 DW_TAG_pointer_type + .long 205 # DW_AT_type + .byte 12 # Abbrev [12] 0xcd:0x5 DW_TAG_pointer_type + .long 210 # DW_AT_type + .byte 4 # Abbrev [4] 0xd2:0x7 DW_TAG_base_type + .long .Linfo_string12 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "." # string offset=114 +.Linfo_string3: + .asciz "_ZL6helperi" # string offset=152 +.Linfo_string4: + .asciz "helper" # string offset=164 +.Linfo_string5: + .asciz "int" # string offset=171 +.Linfo_string6: + .asciz "i" # string offset=175 +.Linfo_string7: + .asciz "_Z13may_not_existv" # string offset=177 +.Linfo_string8: + .asciz "may_not_exist" # string offset=196 +.Linfo_string9: + .asciz "main" # string offset=210 +.Linfo_string10: + .asciz "argc" # string offset=215 +.Linfo_string11: + .asciz "argv" # string offset=220 +.Linfo_string12: + .asciz "char" # string offset=225 +.Linfo_string13: + .asciz "j" # string offset=230 + .weak _Z13may_not_existv + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z13may_not_existv + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-subprogram-multiple-ranges-main.s b/bolt/test/X86/Inputs/dwarf4-subprogram-multiple-ranges-main.s new file mode 100644 index 000000000000..355c44fbf0e8 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-subprogram-multiple-ranges-main.s @@ -0,0 +1,331 @@ +# clang++ -fbasic-block-sections=all -ffunction-sections -g2 -gdwarf-4 +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 1 "/subprogramRanges" "main.cpp" + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 1 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 1 2 7 is_stmt 0 # main.cpp:2:7 + je _Z7doStuffi.__part.2 + jmp _Z7doStuffi.__part.1 +.LBB_END0_0: + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits,unique,1 +_Z7doStuffi.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 1 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) + jmp _Z7doStuffi.__part.2 +.LBB_END0_1: + .size _Z7doStuffi.__part.1, .LBB_END0_1-_Z7doStuffi.__part.1 + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits,unique,2 +_Z7doStuffi.__part.2: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 1 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 1 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END0_2: + .size _Z7doStuffi.__part.2, .LBB_END0_2-_Z7doStuffi.__part.2 + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 1 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 1 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 1 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END1_0: + .cfi_endproc +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x97 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges1 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x24 DW_TAG_subprogram + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_linkage_name + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x3f:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long .Linfo_string7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x4e:0x36 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x67:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 3 # Abbrev [3] 0x75:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 139 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x84:0x7 DW_TAG_base_type + .long .Linfo_string5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x8b:0x5 DW_TAG_pointer_type + .long 144 # DW_AT_type + .byte 6 # Abbrev [6] 0x90:0x5 DW_TAG_pointer_type + .long 149 # DW_AT_type + .byte 7 # Abbrev [7] 0x95:0x5 DW_TAG_const_type + .long 154 # DW_AT_type + .byte 5 # Abbrev [5] 0x9a:0x7 DW_TAG_base_type + .long .Linfo_string10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad _Z7doStuffi.__part.1 + .quad .LBB_END0_1 + .quad _Z7doStuffi.__part.2 + .quad .LBB_END0_2 + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad 0 + .quad 0 +.Ldebug_ranges1: + .quad _Z7doStuffi.__part.1 + .quad .LBB_END0_1 + .quad _Z7doStuffi.__part.2 + .quad .LBB_END0_2 + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-subprogram-single-gc-ranges-main.s b/bolt/test/X86/Inputs/dwarf4-subprogram-single-gc-ranges-main.s new file mode 100644 index 000000000000..dce14ed66ed5 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-subprogram-single-gc-ranges-main.s @@ -0,0 +1,310 @@ +# clang++ -ffunction-sections -g2 -gdwarf-4 +# Manually modified to use ranges like what generates sometimes -fbasic-block-sections=all, +# and changed start of range to 0. +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 1 "/subprogramRanges" "main.cpp" + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 1 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 1 2 7 is_stmt 0 # main.cpp:2:7 + je .LBB0_2 +# %bb.1: # %if.then +.Ltmp2: + .loc 1 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: +.LBB0_2: # %if.end + .loc 1 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 1 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + .cfi_endproc + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp5: + .loc 1 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 1 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 1 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x9f DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x2c DW_TAG_subprogram + .long .Ldebug_ranges1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_linkage_name + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x47:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long .Linfo_string7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x56:0x36 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x6f:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 3 # Abbrev [3] 0x7d:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 144 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x8c:0x7 DW_TAG_base_type + .long .Linfo_string5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x93:0x5 DW_TAG_pointer_type + .long 144 # DW_AT_type + .byte 6 # Abbrev [6] 0x98:0x5 DW_TAG_pointer_type + .long 149 # DW_AT_type + .byte 7 # Abbrev [7] 0x9d:0x5 DW_TAG_const_type + .long 154 # DW_AT_type + .byte 5 # Abbrev [5] 0xa2:0x7 DW_TAG_base_type + .long .Linfo_string10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad 0 + .quad 0 +.Ldebug_ranges1: + .quad 0 + .quad .Lfunc_end0 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf4-subprogram-single-ranges-main.s b/bolt/test/X86/Inputs/dwarf4-subprogram-single-ranges-main.s new file mode 100644 index 000000000000..d416d96e4c72 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf4-subprogram-single-ranges-main.s @@ -0,0 +1,309 @@ +# clang++ -ffunction-sections -g2 -gdwarf-4 +# Manually modified to use ranges like what generates sometimes -fbasic-block-sections=all +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 1 "/subprogramRanges" "main.cpp" + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 1 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 1 2 7 is_stmt 0 # main.cpp:2:7 + je .LBB0_2 +# %bb.1: # %if.then +.Ltmp2: + .loc 1 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: +.LBB0_2: # %if.end + .loc 1 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 1 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + .cfi_endproc + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp5: + .loc 1 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 1 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 1 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x9f DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x2c DW_TAG_subprogram + .long .Ldebug_ranges1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_linkage_name + .long .Linfo_string4 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x47:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long .Linfo_string7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x56:0x36 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x6f:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 132 # DW_AT_type + .byte 3 # Abbrev [3] 0x7d:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 144 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x8c:0x7 DW_TAG_base_type + .long .Linfo_string5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x93:0x5 DW_TAG_pointer_type + .long 144 # DW_AT_type + .byte 6 # Abbrev [6] 0x98:0x5 DW_TAG_pointer_type + .long 149 # DW_AT_type + .byte 7 # Abbrev [7] 0x9d:0x5 DW_TAG_const_type + .long 154 # DW_AT_type + .byte 5 # Abbrev [5] 0xa2:0x7 DW_TAG_base_type + .long .Linfo_string10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad 0 + .quad 0 +.Ldebug_ranges1: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.s b/bolt/test/X86/Inputs/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.s new file mode 100644 index 000000000000..20c782bd2787 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.s @@ -0,0 +1,478 @@ +# -g2 -gdwarf-4 main.cpp -O1 +# static int helper(int i) { +# return ++i; +# } +# void may_not_exist(void) __attribute__ ((weak)); +# int main(int argc, char *argv[]) { +# if (may_not_exist) +# may_not_exist(); +# int j = 0; +# [[clang::always_inline]] j = helper(argc); +# return j; +# } + + .text + .file "main.cpp" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 0 "." "main.cpp" md5 0x375df0e93c6d762413bfe2708333ecaf + .loc 0 5 0 # main.cpp:5:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + pushq %rbx + .cfi_def_cfa_offset 16 + .cfi_offset %rbx, -16 + movl %edi, %ebx +.Ltmp0: + .loc 0 6 7 prologue_end # main.cpp:6:7 + cmpq $0, _Z13may_not_existv@GOTPCREL(%rip) + je .LBB0_2 +.Ltmp1: +# %bb.1: # %if.then + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- $rsi + .loc 0 7 5 # main.cpp:7:5 + callq _Z13may_not_existv@PLT +.Ltmp2: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi +.LBB0_2: # %if.end + #DEBUG_VALUE: main:argc <- $ebx + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + #DEBUG_VALUE: main:j <- 0 + #DEBUG_VALUE: helper:i <- $ebx + .loc 0 2 10 # main.cpp:2:10 + incl %ebx +.Ltmp3: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + #DEBUG_VALUE: helper:i <- $ebx + #DEBUG_VALUE: main:j <- $ebx + .loc 0 10 3 # main.cpp:10:3 + movl %ebx, %eax + .loc 0 10 3 epilogue_begin is_stmt 0 # main.cpp:10:3 + popq %rbx +.Ltmp4: + #DEBUG_VALUE: helper:i <- $eax + #DEBUG_VALUE: main:j <- $eax + .cfi_def_cfa_offset 8 + retq +.Ltmp5: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_loclists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 3 # Offset entry count +.Lloclists_table_base0: + .long .Ldebug_loc0-.Lloclists_table_base0 + .long .Ldebug_loc1-.Lloclists_table_base0 + .long .Ldebug_loc2-.Lloclists_table_base0 +.Ldebug_loc0: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp1-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp1-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp3-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp3-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_loc1: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp2-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp2-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_loc2: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp2-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp3-.Lfunc_begin0 # ending offset + .byte 3 # Loc expr size + .byte 17 # DW_OP_consts + .byte 0 # 0 + .byte 159 # DW_OP_stack_value + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp3-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp4-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp4-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 80 # super-register DW_OP_reg0 + .byte 0 # DW_LLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .ascii "\214\001" # DW_AT_loclists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 34 # DW_FORM_loclistx + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 34 # DW_FORM_loclistx + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 72 # DW_TAG_call_site + .byte 0 # DW_CHILDREN_no + .byte 127 # DW_AT_call_origin + .byte 19 # DW_FORM_ref4 + .byte 125 # DW_AT_call_return_pc + .byte 27 # DW_FORM_addrx + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x8b DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lloclists_table_base0 # DW_AT_loclists_base + .byte 2 # Abbrev [2] 0x27:0x12 DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 57 # DW_AT_type + # DW_AT_inline + .byte 3 # Abbrev [3] 0x30:0x8 DW_TAG_formal_parameter + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 57 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x39:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 5 # Abbrev [5] 0x3d:0x46 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 57 # DW_AT_type + # DW_AT_external + .byte 6 # Abbrev [6] 0x4c:0x9 DW_TAG_formal_parameter + .byte 0 # DW_AT_location + .byte 10 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 57 # DW_AT_type + .byte 6 # Abbrev [6] 0x55:0x9 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 11 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 136 # DW_AT_type + .byte 7 # Abbrev [7] 0x5e:0x9 DW_TAG_variable + .byte 2 # DW_AT_location + .byte 13 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 57 # DW_AT_type + .byte 8 # Abbrev [8] 0x67:0x15 DW_TAG_inlined_subroutine + .long 39 # DW_AT_abstract_origin + .byte 1 # DW_AT_low_pc + .long .Ltmp3-.Ltmp2 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 9 # DW_AT_call_line + .byte 32 # DW_AT_call_column + .byte 9 # Abbrev [9] 0x74:0x7 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 83 + .long 48 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x7c:0x6 DW_TAG_call_site + .long 131 # DW_AT_call_origin + .byte 1 # DW_AT_call_return_pc + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x83:0x5 DW_TAG_subprogram + .byte 7 # DW_AT_linkage_name + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 12 # Abbrev [12] 0x88:0x5 DW_TAG_pointer_type + .long 141 # DW_AT_type + .byte 12 # Abbrev [12] 0x8d:0x5 DW_TAG_pointer_type + .long 146 # DW_AT_type + .byte 4 # Abbrev [4] 0x92:0x4 DW_TAG_base_type + .byte 12 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 60 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "." # string offset=114 +.Linfo_string3: + .asciz "_ZL6helperi" # string offset=152 +.Linfo_string4: + .asciz "helper" # string offset=164 +.Linfo_string5: + .asciz "int" # string offset=171 +.Linfo_string6: + .asciz "i" # string offset=175 +.Linfo_string7: + .asciz "_Z13may_not_existv" # string offset=177 +.Linfo_string8: + .asciz "may_not_exist" # string offset=196 +.Linfo_string9: + .asciz "main" # string offset=210 +.Linfo_string10: + .asciz "argc" # string offset=215 +.Linfo_string11: + .asciz "argv" # string offset=220 +.Linfo_string12: + .asciz "char" # string offset=225 +.Linfo_string13: + .asciz "j" # string offset=230 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .long .Linfo_string11 + .long .Linfo_string12 + .long .Linfo_string13 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Ltmp2 +.Ldebug_addr_end0: + .weak _Z13may_not_existv + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z13may_not_existv + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-subprogram-multiple-ranges-main.s b/bolt/test/X86/Inputs/dwarf5-subprogram-multiple-ranges-main.s new file mode 100644 index 000000000000..d296f61f9b9f --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-subprogram-multiple-ranges-main.s @@ -0,0 +1,385 @@ +# clang++ -fbasic-block-sections=all -ffunction-sections -g2 -gdwarf-5 +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 0 "/subprogramRanges" "main.cpp" md5 0x45fd34ef778739dca24be206894f1d15 + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 0 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 0 2 7 is_stmt 0 # main.cpp:2:7 + je _Z7doStuffi.__part.2 + jmp _Z7doStuffi.__part.1 +.LBB_END0_0: + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits,unique,1 +_Z7doStuffi.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) + jmp _Z7doStuffi.__part.2 +.LBB_END0_1: + .size _Z7doStuffi.__part.1, .LBB_END0_1-_Z7doStuffi.__part.1 + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits,unique,2 +_Z7doStuffi.__part.2: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 0 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END0_2: + .size _Z7doStuffi.__part.2, .LBB_END0_2-_Z7doStuffi.__part.2 + .cfi_endproc + .section .text._Z7doStuffi,"ax",@progbits +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp2: + .loc 0 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 0 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 0 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END1_0: + .cfi_endproc +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x75 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 1 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x18 DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 105 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x37:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 105 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x43:0x26 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 105 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x52:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 105 # DW_AT_type + .byte 3 # Abbrev [3] 0x5d:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 109 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x69:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x6d:0x5 DW_TAG_pointer_type + .long 114 # DW_AT_type + .byte 6 # Abbrev [6] 0x72:0x5 DW_TAG_pointer_type + .long 119 # DW_AT_type + .byte 7 # Abbrev [7] 0x77:0x5 DW_TAG_const_type + .long 124 # DW_AT_type + .byte 5 # Abbrev [5] 0x7c:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z7doStuffi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z7doStuffi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z7doStuffi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z7doStuffi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 3 # DW_RLE_startx_length + .byte 3 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad _Z7doStuffi.__part.1 + .quad _Z7doStuffi.__part.2 + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-subprogram-single-gc-ranges-main.s b/bolt/test/X86/Inputs/dwarf5-subprogram-single-gc-ranges-main.s new file mode 100644 index 000000000000..c7e1663606b6 --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-subprogram-single-gc-ranges-main.s @@ -0,0 +1,359 @@ +# clang++ -ffunction-sections -g2 -gdwarf-5 +# Manually modified to use ranges like what generates sometimes -fbasic-block-sections=all, +# and changed start of range to 0. +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 0 "/subprogramRanges" "main.cpp" md5 0x45fd34ef778739dca24be206894f1d15 + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 0 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 0 2 7 is_stmt 0 # main.cpp:2:7 + je .LBB0_2 +# %bb.1: # %if.then +.Ltmp2: + .loc 0 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: +.LBB0_2: # %if.end + .loc 0 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 0 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + .cfi_endproc + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp5: + .loc 0 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 0 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 0 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x79 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 0 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x1c DW_TAG_subprogram + .byte 1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 109 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x3b:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 109 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x47:0x26 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 109 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x56:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 109 # DW_AT_type + .byte 3 # Abbrev [3] 0x61:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 105 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x6d:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x71:0x5 DW_TAG_pointer_type + .long 114 # DW_AT_type + .byte 6 # Abbrev [6] 0x76:0x5 DW_TAG_pointer_type + .long 119 # DW_AT_type + .byte 7 # Abbrev [7] 0x7b:0x5 DW_TAG_const_type + .long 124 # DW_AT_type + .byte 5 # Abbrev [5] 0x80:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad 0x0 +.Ldebug_addr_end0: + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/Inputs/dwarf5-subprogram-single-ranges-main.s b/bolt/test/X86/Inputs/dwarf5-subprogram-single-ranges-main.s new file mode 100644 index 000000000000..24865146934e --- /dev/null +++ b/bolt/test/X86/Inputs/dwarf5-subprogram-single-ranges-main.s @@ -0,0 +1,357 @@ +# clang++ -ffunction-sections -g2 -gdwarf-5 +# Manually modified to use ranges like what generates sometimes -fbasic-block-sections=all +# int doStuff(int val) { +# if (val) +# ++val; +# return val; +# } +# +# int main(int argc, const char** argv) { +# return doStuff(argc); +# } + .text + .file "main.cpp" + .section .text._Z7doStuffi,"ax",@progbits + .globl _Z7doStuffi # -- Begin function _Z7doStuffi + .p2align 4, 0x90 + .type _Z7doStuffi,@function +_Z7doStuffi: # @_Z7doStuffi +.Lfunc_begin0: + .file 0 "/subprogramRanges" "main.cpp" md5 0x45fd34ef778739dca24be206894f1d15 + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 0 2 7 prologue_end # main.cpp:2:7 + cmpl $0, -4(%rbp) +.Ltmp1: + .loc 0 2 7 is_stmt 0 # main.cpp:2:7 + je .LBB0_2 +# %bb.1: # %if.then +.Ltmp2: + .loc 0 3 5 is_stmt 1 # main.cpp:3:5 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: +.LBB0_2: # %if.end + .loc 0 4 10 # main.cpp:4:10 + movl -4(%rbp), %eax + .loc 0 4 3 epilogue_begin is_stmt 0 # main.cpp:4:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end0: + .size _Z7doStuffi, .Lfunc_end0-_Z7doStuffi + .cfi_endproc + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 7 0 is_stmt 1 # main.cpp:7:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp5: + .loc 0 8 21 prologue_end # main.cpp:8:21 + movl -8(%rbp), %edi + .loc 0 8 13 is_stmt 0 # main.cpp:8:13 + callq _Z7doStuffi + .loc 0 8 5 epilogue_begin # main.cpp:8:5 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x79 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 0 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x1c DW_TAG_subprogram + .byte 1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 109 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x3b:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 109 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x47:0x26 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 109 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x56:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 109 # DW_AT_type + .byte 3 # Abbrev [3] 0x61:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 7 # DW_AT_decl_line + .long 105 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x6d:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 6 # Abbrev [6] 0x71:0x5 DW_TAG_pointer_type + .long 114 # DW_AT_type + .byte 6 # Abbrev [6] 0x76:0x5 DW_TAG_pointer_type + .long 119 # DW_AT_type + .byte 7 # Abbrev [7] 0x7b:0x5 DW_TAG_const_type + .long 124 # DW_AT_type + .byte 5 # Abbrev [5] 0x80:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "/subprogramRanges" # string offset=114 +.Linfo_string3: + .asciz "_Z7doStuffi" # string offset=169 +.Linfo_string4: + .asciz "doStuff" # string offset=181 +.Linfo_string5: + .asciz "int" # string offset=189 +.Linfo_string6: + .asciz "main" # string offset=193 +.Linfo_string7: + .asciz "val" # string offset=198 +.Linfo_string8: + .asciz "argc" # string offset=202 +.Linfo_string9: + .asciz "argv" # string offset=207 +.Linfo_string10: + .asciz "char" # string offset=212 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 640e07c49037cca41a1bfbeb916b569d8c950aea)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Z7doStuffi + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/debug-fission-single-convert.s b/bolt/test/X86/debug-fission-single-convert.s new file mode 100644 index 000000000000..5f882a70690f --- /dev/null +++ b/bolt/test/X86/debug-fission-single-convert.s @@ -0,0 +1,76 @@ +# Checks debug fission support in BOLT + +# REQUIRES: system-linux + +# RUN: llvm-mc -g \ +# RUN: --filetype=obj \ +# RUN: --triple x86_64-unknown-unknown \ +# RUN: --split-dwarf-file=debug-fission-simple-convert.dwo \ +# RUN: %p/Inputs/debug-fission-simple-convert.s \ +# RUN: -o %t.o +# RUN: %clangxx %cxxflags -no-pie -g \ +# RUN: -Wl,--gc-sections,-q,-nostdlib \ +# RUN: -Wl,--undefined=_Z6_startv \ +# RUN: -nostartfiles \ +# RUN: -Wl,--script=%p/Inputs/debug-fission-script.txt \ +# RUN: %t.o -o %t.exe +# RUN: llvm-bolt %t.exe \ +# RUN: --reorder-blocks=reverse \ +# RUN: --update-debug-sections \ +# RUN: --dwarf-output-path=%T \ +# RUN: --always-convert-to-ranges=true \ +# RUN: -o %t.bolt.1.exe 2>&1 | FileCheck %s +# RUN: llvm-dwarfdump --show-form --verbose --debug-ranges %t.bolt.1.exe &> %tAddrIndexTest +# RUN: not llvm-dwarfdump --show-form --verbose --debug-info %T/debug-fission-simple-convert.dwo0.dwo >> %tAddrIndexTest +# RUN: cat %tAddrIndexTest | FileCheck %s --check-prefix=CHECK-DWO-DWO +# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt.1.exe | FileCheck %s --check-prefix=CHECK-ADDR-SEC + +# CHECK-NOT: warning: DWARF unit from offset {{.*}} incl. to offset {{.*}} excl. tries to read DIEs at offset {{.*}} + +# CHECK-DWO-DWO: 00000010 +# CHECK-DWO-DWO: 00000010 +# CHECK-DWO-DWO: 00000050 +# CHECK-DWO-DWO: DW_TAG_subprogram +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) +# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000 +# CHECK-DWO-DWO: DW_TAG_subprogram +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) +# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020 +# CHECK-DWO-DWO: DW_TAG_subprogram +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) +# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040 + +# CHECK-ADDR-SEC: .debug_addr contents: +# CHECK-ADDR-SEC: 0x00000000: Addrs: [ +# CHECK-ADDR-SEC: 0x0000000000601000 + +# RUN: llvm-bolt %t.exe --reorder-blocks=reverse --update-debug-sections --dwarf-output-path=%T -o %t.bolt.2.exe --write-dwp=true \ +# RUN: --always-convert-to-ranges=true +# RUN: not llvm-dwarfdump --show-form --verbose --debug-info %t.bolt.2.exe.dwp &> %tAddrIndexTestDwp +# RUN: cat %tAddrIndexTestDwp | FileCheck %s --check-prefix=CHECK-DWP-DEBUG + +# CHECK-DWP-DEBUG: DW_TAG_compile_unit [1] * +# CHECK-DWP-DEBUG: DW_AT_producer [DW_FORM_GNU_str_index] (indexed (0000000a) string = "clang version 13.0.0") +# CHECK-DWP-DEBUG: DW_AT_language [DW_FORM_data2] (DW_LANG_C_plus_plus) +# CHECK-DWP-DEBUG: DW_AT_name [DW_FORM_GNU_str_index] (indexed (0000000b) string = "foo") +# CHECK-DWP-DEBUG: DW_AT_GNU_dwo_name [DW_FORM_GNU_str_index] (indexed (0000000c) string = "foo") +# CHECK-DWP-DEBUG: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x06105e732fad3796) + + +//clang++ -ffunction-sections -fno-exceptions -g -gsplit-dwarf=split -S debug-fission-simple.cpp -o debug-fission-simple.s +static int foo = 2; +int doStuff(int val) { + if (val == 5) + val += 1 + foo; + else + val -= 1; + return val; +} + +int doStuff2(int val) { + return val += 3; +} + +int main(int argc, const char** argv) { + return doStuff(argc); +} diff --git a/bolt/test/X86/debug-fission-single.s b/bolt/test/X86/debug-fission-single.s index fe0308902ec6..0d25aaef274a 100644 --- a/bolt/test/X86/debug-fission-single.s +++ b/bolt/test/X86/debug-fission-single.s @@ -20,7 +20,7 @@ # RUN: --dwarf-output-path=%T \ # RUN: -o %t.bolt.1.exe 2>&1 | FileCheck %s # RUN: llvm-dwarfdump --show-form --verbose --debug-ranges %t.bolt.1.exe &> %tAddrIndexTest -# RUN: not llvm-dwarfdump --show-form --verbose --debug-info %T/debug-fission-simple.dwo0.dwo >> %tAddrIndexTest +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %T/debug-fission-simple.dwo0.dwo >> %tAddrIndexTest # RUN: cat %tAddrIndexTest | FileCheck %s --check-prefix=CHECK-DWO-DWO # RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt.1.exe | FileCheck %s --check-prefix=CHECK-ADDR-SEC @@ -28,23 +28,22 @@ # CHECK-DWO-DWO: 00000010 # CHECK-DWO-DWO: 00000010 -# CHECK-DWO-DWO: 00000050 # CHECK-DWO-DWO: DW_TAG_subprogram -# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) -# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000 +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000001) +# CHECK-DWO-DWO-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000031) # CHECK-DWO-DWO: DW_TAG_subprogram -# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) -# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020 +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000002) +# CHECK-DWO-DWO-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000012) # CHECK-DWO-DWO: DW_TAG_subprogram -# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000000) -# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040 +# CHECK-DWO-DWO-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000003) +# CHECK-DWO-DWO-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x0000001d) # CHECK-ADDR-SEC: .debug_addr contents: # CHECK-ADDR-SEC: 0x00000000: Addrs: [ # CHECK-ADDR-SEC: 0x0000000000601000 # RUN: llvm-bolt %t.exe --reorder-blocks=reverse --update-debug-sections --dwarf-output-path=%T -o %t.bolt.2.exe --write-dwp=true -# RUN: not llvm-dwarfdump --show-form --verbose --debug-info %t.bolt.2.exe.dwp &> %tAddrIndexTestDwp +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt.2.exe.dwp &> %tAddrIndexTestDwp # RUN: cat %tAddrIndexTestDwp | FileCheck %s --check-prefix=CHECK-DWP-DEBUG # CHECK-DWP-DEBUG: DW_TAG_compile_unit [1] * diff --git a/bolt/test/X86/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.test b/bolt/test/X86/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.test new file mode 100644 index 000000000000..95c1c747a3d0 --- /dev/null +++ b/bolt/test/X86/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.test @@ -0,0 +1,24 @@ +; REQUIRES: system-linux + +; RUN: rm -rf %t +; RUN: mkdir %t +; RUN: cd %t + +; RUN: llvm-mc -split-dwarf-file=main.dwo -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-do-no-convert-low-pc-high-pc-to-ranges.s -o main.o +; RUN: %clang %cflags -gsplit-dwarf=split main.o -o main.exe -Wl,-q +; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections +; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo | FileCheck --check-prefix=PRECHECK %s +; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo.dwo | FileCheck --check-prefix=POSTCHECK %s + +; This test checks that we do not convert low_pc/high_pc to ranges for DW_TAG_inlined_subroutine, +; when there is only one output range entry. + +; PRECHECK: DW_TAG_inlined_subroutine +; PRECHECK: DW_AT_abstract_origin +; PRECHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000001) +; PRECHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) + +; POSTCHECK: DW_TAG_inlined_subroutine +; POSTCHECK: DW_AT_abstract_origin +; POSTCHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index] (indexed (00000003) +; POSTCHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) diff --git a/bolt/test/X86/dwarf4-df-dualcu-loclist.test b/bolt/test/X86/dwarf4-df-dualcu-loclist.test index 2918690c0bf3..6ef4fb97e8ca 100644 --- a/bolt/test/X86/dwarf4-df-dualcu-loclist.test +++ b/bolt/test/X86/dwarf4-df-dualcu-loclist.test @@ -8,9 +8,9 @@ ; RUN: %clang %cflags -gdwarf-5 -O2 -gsplit-dwarf=split main.o helper.o -o main.exe ; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo | FileCheck -check-prefix=PRE-BOLT-DWO-MAIN %s -; RUN: not llvm-dwarfdump --show-form --verbose --debug-info main.dwo.dwo | FileCheck -check-prefix=BOLT-DWO-MAIN %s +; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.dwo.dwo | FileCheck -check-prefix=BOLT-DWO-MAIN %s ; RUN: llvm-dwarfdump --show-form --verbose --debug-info helper.dwo | FileCheck -check-prefix=PRE-BOLT-DWO-HELPER %s -; RUN: not llvm-dwarfdump --show-form --verbose --debug-info helper.dwo.dwo | FileCheck -check-prefix=BOLT-DWO-HELPER %s +; RUN: llvm-dwarfdump --show-form --verbose --debug-info helper.dwo.dwo | FileCheck -check-prefix=BOLT-DWO-HELPER %s ; Testing dwarf4 split dwarf for two CUs. Making sure DW_AT_location [DW_FORM_sec_offset] is updated correctly. @@ -27,12 +27,12 @@ ; BOLT-DWO-MAIN: version = 0x0004 ; BOLT-DWO-MAIN: DW_TAG_formal_parameter [10] ; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000010: -; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000002, 0x0000000000000014) -; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000003, 0x0000000000000005) +; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000003, 0x0000000000000014) +; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000004, 0x0000000000000005) ; BOLT-DWO-MAIN: DW_TAG_formal_parameter [10] ; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_sec_offset] (0x00000026: -; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000002, 0x000000000000000c) -; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000004, 0x000000000000000d) +; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000003, 0x000000000000000c) +; BOLT-DWO-MAIN-NEXT: DW_LLE_startx_length (0x0000000000000005, 0x000000000000000d) ; PRE-BOLT-DWO-HELPER: version = 0x0004 diff --git a/bolt/test/X86/dwarf4-df-dualcu.test b/bolt/test/X86/dwarf4-df-dualcu.test index ec9d574d3fd6..71726136d7ca 100644 --- a/bolt/test/X86/dwarf4-df-dualcu.test +++ b/bolt/test/X86/dwarf4-df-dualcu.test @@ -6,7 +6,7 @@ ; RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-df-dualcu-helper.s \ ; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split main.o helper.o -o main.exe -; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections +; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections --always-convert-to-ranges ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s ; RUN: llvm-dwarfdump --show-form --verbose --debug-ranges main.exe.bolt &> %t/foo.txt ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe.bolt >> %t/foo.txt diff --git a/bolt/test/X86/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.test b/bolt/test/X86/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.test new file mode 100644 index 000000000000..2e861c7ea504 --- /dev/null +++ b/bolt/test/X86/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.test @@ -0,0 +1,20 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-do-no-convert-low-pc-high-pc-to-ranges.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks that we do not convert low_pc/high_pc to ranges for DW_TAG_inlined_subroutine, +# when there is only one output range entry. + +# PRECHECK: DW_TAG_inlined_subroutine +# PRECHECK: DW_AT_abstract_origin +# PRECHECK: DW_AT_low_pc +# PRECHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) + +# POSTCHECK: DW_TAG_inlined_subroutine +# POSTCHECK: DW_AT_abstract_origin +# POSTCHECK: DW_AT_low_pc +# POSTCHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) diff --git a/bolt/test/X86/dwarf4-size-0-inlined_subroutine.s b/bolt/test/X86/dwarf4-size-0-inlined_subroutine.s index 83594520a640..584e67b1c79f 100644 --- a/bolt/test/X86/dwarf4-size-0-inlined_subroutine.s +++ b/bolt/test/X86/dwarf4-size-0-inlined_subroutine.s @@ -9,11 +9,11 @@ # CHECK: DW_TAG_inlined_subroutine # CHECK: DW_AT_low_pc [DW_FORM_addr] (0x[[#%.16x,ADDR:]]) -# CHECK: DW_AT_ranges [DW_FORM_sec_offset] -# CHECK-NEXT: [0x[[#ADDR]], 0x[[#ADDR]]) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000000) # CHECK: DW_TAG_inlined_subroutine # CHECK-NOT: DW_AT_low_pc [DW_FORM_addr] (0x[[#ADDR]]) +# CHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000000) # Testing BOLT handles correctly when size of DW_AT_inlined_subroutine is 0. diff --git a/bolt/test/X86/dwarf4-subprogram-multiple-ranges.test b/bolt/test/X86/dwarf4-subprogram-multiple-ranges.test new file mode 100644 index 000000000000..63db886c9137 --- /dev/null +++ b/bolt/test/X86/dwarf4-subprogram-multiple-ranges.test @@ -0,0 +1,23 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-subprogram-multiple-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-objdump %t.bolt --disassemble > %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with multiple entries. + +# POSTCHECK: _Z7doStuffi>: +# POSTCHECK: [[#%.6x,ADDR:]] +# POSTCHECK: _Z7doStuffi.__part.1>: +# POSTCHECK-NEXT: [[#%.6x,ADDR1:]] +# POSTCHECK: _Z7doStuffi.__part.2>: +# POSTCHECK-NEXT: [[#%.6x,ADDR2:]] + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_ranges +# POSTCHECK-NEXT: [0x0000000000[[#ADDR1]], 0x0000000000[[#ADDR1 + 0xb]]) +# POSTCHECK-NEXT: [0x0000000000[[#ADDR2]], 0x0000000000[[#ADDR2 + 0x5]]) +# POSTCHECK-NEXT: [0x0000000000[[#ADDR]], 0x0000000000[[#ADDR + 0xf]])) diff --git a/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test new file mode 100644 index 000000000000..9080052a2991 --- /dev/null +++ b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test @@ -0,0 +1,23 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-subprogram-single-gc-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. + +# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name +# POSTCHECK-SAME: _Z7doStuffi +# POSTCHECK-NEXT: DW_AT_name +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000) +# POSTCHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000001) diff --git a/bolt/test/X86/dwarf4-subprogram-single-ranges.test b/bolt/test/X86/dwarf4-subprogram-single-ranges.test new file mode 100644 index 000000000000..0dcbbcdfcce3 --- /dev/null +++ b/bolt/test/X86/dwarf4-subprogram-single-ranges.test @@ -0,0 +1,25 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-subprogram-single-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-objdump %t.bolt --disassemble > %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry. + +# POSTCHECK: _Z7doStuffi>: +# POSTCHECK: [[#%.6x,ADDR:]] + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name +# POSTCHECK-SAME: _Z7doStuffi +# POSTCHECK-NEXT: DW_AT_name +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000[[#ADDR]]) +# POSTCHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x0000001b) diff --git a/bolt/test/X86/dwarf4-types-dwarf5-types.test b/bolt/test/X86/dwarf4-types-dwarf5-types.test index dc45d0c3daa7..a5d2ec8df20a 100644 --- a/bolt/test/X86/dwarf4-types-dwarf5-types.test +++ b/bolt/test/X86/dwarf4-types-dwarf5-types.test @@ -42,9 +42,9 @@ # POSTCHECK: version = 0x0005 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_subprogram -# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0048 => {0x{{[0-9a-f]+}}} "int") +# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x004b => {0x{{[0-9a-f]+}}} "int") # POSTCHECK: DW_TAG_variable -# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x004c => {0x{{[0-9a-f]+}}} "Foo2a") +# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x004f => {0x{{[0-9a-f]+}}} "Foo2a") # POSTCHECK: DW_TAG_base_type # POSTCHECK: DW_TAG_structure_type # POSTCHECK: DW_AT_signature [DW_FORM_ref_sig8] (0x104ec427d2ebea6f) diff --git a/bolt/test/X86/dwarf4-types-dwarf5.test b/bolt/test/X86/dwarf4-types-dwarf5.test index 368e0f8284e8..9ece6db3f00a 100644 --- a/bolt/test/X86/dwarf4-types-dwarf5.test +++ b/bolt/test/X86/dwarf4-types-dwarf5.test @@ -19,7 +19,7 @@ # POSTCHECK: version = 0x0005 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_subprogram -# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0031 => {0x000000d8} "int") +# POSTCHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0034 => {0x000000db} "int") # POSTCHECK: DW_TAG_base_type # POSTCHECKTU: version = 0x0004 diff --git a/bolt/test/X86/dwarf5-df-dualcu-loclist.test b/bolt/test/X86/dwarf5-df-dualcu-loclist.test index 3c212d126c04..ea5b28a2e88f 100644 --- a/bolt/test/X86/dwarf5-df-dualcu-loclist.test +++ b/bolt/test/X86/dwarf5-df-dualcu-loclist.test @@ -29,12 +29,12 @@ ; BOLT-DWO-MAIN: version = 0x0005 ; BOLT-DWO-MAIN: DW_TAG_formal_parameter [10] ; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x0) loclist = 0x00000014: -; BOLT-DWO-MAIN-NEXT: DW_LLE_base_addressx (0x0000000000000004) +; BOLT-DWO-MAIN-NEXT: DW_LLE_base_addressx (0x0000000000000003) ; BOLT-DWO-MAIN-NEXT: DW_LLE_offset_pair (0x0000000000000000, 0x0000000000000014) ; BOLT-DWO-MAIN-NEXT: DW_LLE_offset_pair (0x0000000000000014, 0x0000000000000019) ; BOLT-DWO-MAIN: DW_TAG_formal_parameter [10] ; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_loclistx] (indexed (0x1) loclist = 0x00000024: -; BOLT-DWO-MAIN-NEXT: DW_LLE_base_addressx (0x0000000000000004) +; BOLT-DWO-MAIN-NEXT: DW_LLE_base_addressx (0x0000000000000003) ; BOLT-DWO-MAIN-NEXT: DW_LLE_offset_pair (0x0000000000000000, 0x000000000000000c) ; BOLT-DWO-MAIN-NEXT: DW_LLE_offset_pair (0x000000000000000c, 0x0000000000000019) diff --git a/bolt/test/X86/dwarf5-df-dualcu.test b/bolt/test/X86/dwarf5-df-dualcu.test index ee6fe73de294..7527de5121ca 100644 --- a/bolt/test/X86/dwarf5-df-dualcu.test +++ b/bolt/test/X86/dwarf5-df-dualcu.test @@ -6,7 +6,7 @@ ; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-df-dualcu-helper.s \ ; RUN: -split-dwarf-file=helper.dwo -o helper.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split main.o helper.o -o main.exe -; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections +; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections --always-convert-to-ranges ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s ; RUN: llvm-dwarfdump --show-form --verbose --debug-addr main.exe.bolt &> %t/foo.txt ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe.bolt >> %t/foo.txt diff --git a/bolt/test/X86/dwarf5-df-mono-dualcu.test b/bolt/test/X86/dwarf5-df-mono-dualcu.test index 98d7854c703e..351be6051af8 100644 --- a/bolt/test/X86/dwarf5-df-mono-dualcu.test +++ b/bolt/test/X86/dwarf5-df-mono-dualcu.test @@ -5,7 +5,7 @@ ; RUN: -split-dwarf-file=main.dwo -o main.o ; RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux-gnu %p/Inputs/dwarf5-df-mono-helper.s -o=helper.o ; RUN: %clang %cflags -gdwarf-5 main.o helper.o -o main.exe -; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections +; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections --always-convert-to-ranges ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck -check-prefix=PRE-BOLT %s ; RUN: llvm-dwarfdump --show-form --verbose --debug-addr main.exe.bolt &> %t/foo.txt ; RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe.bolt >> %t/foo.txt diff --git a/bolt/test/X86/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.test b/bolt/test/X86/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.test new file mode 100644 index 000000000000..1a59844814cd --- /dev/null +++ b/bolt/test/X86/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.test @@ -0,0 +1,20 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-do-no-convert-low-pc-high-pc-to-ranges.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks that we do not convert low_pc/high_pc to ranges for DW_TAG_inlined_subroutine, +# when there is only one output range entry. + +# PRECHECK: DW_TAG_inlined_subroutine +# PRECHECK: DW_AT_abstract_origin +# PRECHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) +# PRECHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) + +# POSTCHECK: DW_TAG_inlined_subroutine +# POSTCHECK: DW_AT_abstract_origin +# POSTCHECK: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000002) +# POSTCHECK: DW_AT_high_pc [DW_FORM_data4] (0x00000002) diff --git a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb11.test b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb11.test index aa9000d70f97..17663a7f72df 100644 --- a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb11.test +++ b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb11.test @@ -11,8 +11,8 @@ # POSTCHECK: Version = 8 # POSTCHECK: CU list offset = 0x18, has 2 entries -# POSTCHECK-NEXT: 0: Offset = 0x40, Length = 0x52 -# POSTCHECK-NEXT: 1: Offset = 0x92, Length = 0x72 +# POSTCHECK-NEXT: 0: Offset = 0x40, Length = 0x55 +# POSTCHECK-NEXT: 1: Offset = 0x95, Length = 0x72 # POSTCHECK: Types CU list offset = 0x38, has 2 entries # POSTCHECK-NEXT: 0: offset = 0x00000000, type_offset = 0x00000023, type_signature = 0x418503b8111e9a7b # POSTCHECK-NEXT: 1: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x00f6cca4e3a15118 diff --git a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb9.test b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb9.test index 4b54dd841b1a..c283ec02387f 100644 --- a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb9.test +++ b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb9.test @@ -12,8 +12,8 @@ # POSTCHECK: Version = 8 # POSTCHECK: CU list offset = 0x18, has 3 entries # POSTCHECK-NEXT: 0: Offset = 0x0, Length = 0x40 -# POSTCHECK-NEXT: 1: Offset = 0x40, Length = 0x52 -# POSTCHECK-NEXT: 2: Offset = 0x92, Length = 0x72 +# POSTCHECK-NEXT: 1: Offset = 0x40, Length = 0x55 +# POSTCHECK-NEXT: 2: Offset = 0x95, Length = 0x72 # POSTCHECK: Types CU list offset = 0x48, has 2 entries # POSTCHECK-NEXT: 0: offset = 0x00000000, type_offset = 0x00000023, type_signature = 0x418503b8111e9a7b # POSTCHECK-NEXT: 1: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x00f6cca4e3a15118 diff --git a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-lld-generated.test b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-lld-generated.test index 71261f917ba4..6eaad4cd06d3 100644 --- a/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-lld-generated.test +++ b/bolt/test/X86/dwarf5-dwarf4-gdb-index-types-lld-generated.test @@ -10,8 +10,8 @@ # POSTCHECK: Version = 7 # POSTCHECK: CU list offset = 0x18, has 2 entries -# POSTCHECK-NEXT: 0: Offset = 0x40, Length = 0x52 -# POSTCHECK-NEXT: 1: Offset = 0x92, Length = 0x72 +# POSTCHECK-NEXT: 0: Offset = 0x40, Length = 0x55 +# POSTCHECK-NEXT: 1: Offset = 0x95, Length = 0x72 # POSTCHECK: Types CU list offset = 0x38, has 0 entries # POSTCHECK: Address area offset = 0x38, has 2 entries # POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]], diff --git a/bolt/test/X86/dwarf5-dwarf4-monolithic.test b/bolt/test/X86/dwarf5-dwarf4-monolithic.test index d9e18ca33cee..b40b2fc3d344 100644 --- a/bolt/test/X86/dwarf5-dwarf4-monolithic.test +++ b/bolt/test/X86/dwarf5-dwarf4-monolithic.test @@ -5,7 +5,7 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-dwarf4-monolithic-helper1.s -o %t1.o # RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-dwarf4-monolithic-helper2.s -o %t2.o # RUN: %clang %cflags -dwarf-5 %tmain.o %t0.o %t1.o %t2.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-bolt --always-convert-to-ranges %t.exe -o %t.bolt --update-debug-sections # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s # RUN: llvm-dwarfdump --show-form --verbose --debug-line %t.exe > %t_line.txt # RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt diff --git a/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test b/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test index acccb458984c..8afbe9e747d2 100644 --- a/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test +++ b/bolt/test/X86/dwarf5-dwarf4-types-backward-forward-cross-reference.test @@ -15,19 +15,19 @@ # POSTCHECK: version = 0x0005 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_variable [14] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000000001f8 "Foo2a") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo2a") # POSTCHECK: version = 0x0005 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_variable [14] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x0000000000000199 "Foo") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo") # POSTCHECK: version = 0x0004 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_variable [20] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000000002f4 "Foo4a") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo4a") # POSTCHECK: version = 0x0004 # POSTCHECK: DW_TAG_compile_unit # POSTCHECK: DW_TAG_variable [20] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x0000000000000276 "Foo3a") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo3a") # POSTCHECKTU: version = 0x0004 # POSTCHECKTU: DW_TAG_type_unit diff --git a/bolt/test/X86/dwarf5-ftypes-dwo-mono-input-dwp-output.test b/bolt/test/X86/dwarf5-ftypes-dwo-mono-input-dwp-output.test index 497de33519fb..69758505c2a6 100644 --- a/bolt/test/X86/dwarf5-ftypes-dwo-mono-input-dwp-output.test +++ b/bolt/test/X86/dwarf5-ftypes-dwo-mono-input-dwp-output.test @@ -31,13 +31,13 @@ ; BOLT-SAME: 0x675d23e4f33235f2 ; BOLT: 0x0000004b: Type Unit: length = 0x0000003e ; BOLT-SAME: 0x49dc260088be7e56 -; BOLT: 0x0000008d: Compile Unit: length = 0x00000074 +; BOLT: 0x0000008d: Compile Unit: length = 0x00000077 ; BOLT-SAME: 0x4257354d8bb35644 -; BOLT: 0x00000105: Type Unit: length = 0x00000047 +; BOLT: 0x00000108: Type Unit: length = 0x00000047 ; BOLT-SAME: 0x104ec427d2ebea6f -; BOLT: 0x00000150: Type Unit: length = 0x0000003e +; BOLT: 0x00000153: Type Unit: length = 0x0000003e ; BOLT-SAME: 0xb4580bc1535df1e4 -; BOLT: 0x00000192: Compile Unit: length = 0x00000051 +; BOLT: 0x00000195: Compile Unit: length = 0x00000054 ; BOLT-SAME: 0x7738bfb5f3edfb73 ; BOLT-NOT: 0x8f55ac73549bc003 ; BOLT-NOT: 0xe7734af8fed0632e @@ -45,11 +45,11 @@ ; BOLT-DWP-TU-INDEX: version = 5, units = 4, slots = 8 ; BOLT-DWP-TU-INDEX: Index Signature ; BOLT-DWP-TU-INDEX: 3 0x675d23e4f33235f2 [0x0000000000000000, 0x000000000000004b) [0x00000000, 0x00000083) [0x00000000, 0x00000056) [0x00000000, 0x00000044) -; BOLT-DWP-TU-INDEX: 5 0xb4580bc1535df1e4 [0x0000000000000150, 0x0000000000000192) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) +; BOLT-DWP-TU-INDEX: 5 0xb4580bc1535df1e4 [0x0000000000000153, 0x0000000000000195) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) ; BOLT-DWP-TU-INDEX: 7 0x49dc260088be7e56 [0x000000000000004b, 0x000000000000008d) [0x00000000, 0x00000083) [0x00000000, 0x00000056) [0x00000000, 0x00000044) -; BOLT-DWP-TU-INDEX: 8 0x104ec427d2ebea6f [0x0000000000000105, 0x0000000000000150) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) +; BOLT-DWP-TU-INDEX: 8 0x104ec427d2ebea6f [0x0000000000000108, 0x0000000000000153) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) ; BOLT-DWP-CU-INDEX: version = 5, units = 2, slots = 4 ; BOLT-DWP-CU-INDEX: Index Signature -; BOLT-DWP-CU-INDEX: 1 0x4257354d8bb35644 [0x000000000000008d, 0x0000000000000105) [0x00000000, 0x00000083) [0x00000000, 0x00000056) [0x00000000, 0x00000044) -; BOLT-DWP-CU-INDEX: 4 0x7738bfb5f3edfb73 [0x0000000000000192, 0x00000000000001e7) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) +; BOLT-DWP-CU-INDEX: 1 0x4257354d8bb35644 [0x000000000000008d, 0x0000000000000108) [0x00000000, 0x00000083) [0x00000000, 0x00000056) [0x00000000, 0x00000044) +; BOLT-DWP-CU-INDEX: 4 0x7738bfb5f3edfb73 [0x0000000000000195, 0x00000000000001ed) [0x00000083, 0x000000f9) [0x00000056, 0x000000ae) [0x00000044, 0x00000084) diff --git a/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb11.test b/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb11.test index 702f222723b4..f8f33b321a7d 100644 --- a/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb11.test +++ b/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb11.test @@ -11,8 +11,8 @@ # POSTCHECK: Version = 8 # POSTCHECK: CU list offset = 0x18, has 2 entries -# POSTCHECK-NEXT: 0: Offset = 0x80, Length = 0x52 -# POSTCHECK-NEXT: 1: Offset = 0xd2, Length = 0x53 +# POSTCHECK-NEXT: 0: Offset = 0x80, Length = 0x55 +# POSTCHECK-NEXT: 1: Offset = 0xd5, Length = 0x56 # POSTCHECK: Types CU list offset = 0x38, has 2 entries # POSTCHECK-NEXT: 0: offset = 0x00000000, type_offset = 0x00000023, type_signature = 0x418503b8111e9a7b # POSTCHECK-NEXT: 1: offset = 0x00000040, type_offset = 0x00000023, type_signature = 0x00f6cca4e3a15118 diff --git a/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb9.test b/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb9.test index e694a467bcd2..bccc92d3de84 100644 --- a/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb9.test +++ b/bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb9.test @@ -13,8 +13,8 @@ # POSTCHECK: CU list offset = 0x18, has 4 entries # POSTCHECK-NEXT: 0: Offset = 0x0, Length = 0x40 # POSTCHECK-NEXT: 1: Offset = 0x40, Length = 0x40 -# POSTCHECK-NEXT: 2: Offset = 0x80, Length = 0x52 -# POSTCHECK-NEXT: 3: Offset = 0xd2, Length = 0x53 +# POSTCHECK-NEXT: 2: Offset = 0x80, Length = 0x55 +# POSTCHECK-NEXT: 3: Offset = 0xd5, Length = 0x56 # POSTCHECK: Types CU list offset = 0x58, has 2 entries # POSTCHECK-NEXT: 0: offset = 0x00000000, type_offset = 0x00000023, type_signature = 0x418503b8111e9a7b # POSTCHECK-NEXT: 1: offset = 0x00000040, type_offset = 0x00000023, type_signature = 0x00f6cca4e3a15118 diff --git a/bolt/test/X86/dwarf5-gdb-index-types-lld-generated.test b/bolt/test/X86/dwarf5-gdb-index-types-lld-generated.test index 8f0043dd7609..18fe7daa4ad4 100644 --- a/bolt/test/X86/dwarf5-gdb-index-types-lld-generated.test +++ b/bolt/test/X86/dwarf5-gdb-index-types-lld-generated.test @@ -10,8 +10,8 @@ # POSTCHECK: Version = 7 # POSTCHECK: CU list offset = 0x18, has 2 entries -# POSTCHECK-NEXT: 0: Offset = 0x80, Length = 0x52 -# POSTCHECK-NEXT: 1: Offset = 0xd2, Length = 0x53 +# POSTCHECK-NEXT: 0: Offset = 0x80, Length = 0x55 +# POSTCHECK-NEXT: 1: Offset = 0xd5, Length = 0x56 # POSTCHECK: Types CU list offset = 0x38, has 0 entries # POSTCHECK: Address area offset = 0x38, has 2 entries # POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]], diff --git a/bolt/test/X86/dwarf5-locaddrx.test b/bolt/test/X86/dwarf5-locaddrx.test index ee96dd968693..5d4c0bd22f99 100644 --- a/bolt/test/X86/dwarf5-locaddrx.test +++ b/bolt/test/X86/dwarf5-locaddrx.test @@ -4,7 +4,7 @@ ; RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-locaddrx.s \ ; RUN: -split-dwarf-file=mainlocadddrx.dwo -o mainlocadddrx.o ; RUN: %clang %cflags -gdwarf-5 -gsplit-dwarf=split mainlocadddrx.o -o mainlocadddrx.exe -; RUN: llvm-bolt mainlocadddrx.exe -o mainlocadddrx.exe.bolt --update-debug-sections +; RUN: llvm-bolt mainlocadddrx.exe -o mainlocadddrx.exe.bolt --update-debug-sections --always-convert-to-ranges ; RUN: llvm-dwarfdump --show-form --verbose --debug-info mainlocadddrx.exe | FileCheck -check-prefix=PRE-BOLT %s ; RUN: llvm-dwarfdump --show-form --verbose --debug-addr mainlocadddrx.exe.bolt &> %t/foo.txt ; RUN: llvm-dwarfdump --show-form --verbose --debug-info mainlocadddrx.exe.bolt >> %t/foo.txt diff --git a/bolt/test/X86/dwarf5-locexpr-referrence.test b/bolt/test/X86/dwarf5-locexpr-referrence.test index fe0f6a61514c..27b7a2b38d97 100644 --- a/bolt/test/X86/dwarf5-locexpr-referrence.test +++ b/bolt/test/X86/dwarf5-locexpr-referrence.test @@ -16,6 +16,6 @@ # CHECK: version = 0x0005 # CHECK: DW_TAG_variable # CHECK-NEXT: DW_AT_location -# CHECK-SAME: DW_OP_convert (0x00000028 -> 0x0000008f) -# CHECK-SAME: DW_OP_convert (0x0000002c -> 0x00000093) +# CHECK-SAME: DW_OP_convert (0x00000028 -> 0x00000092) +# CHECK-SAME: DW_OP_convert (0x0000002c -> 0x00000096) # CHECK: version = 0x0005 diff --git a/bolt/test/X86/dwarf5-lowpc-highpc-convert.s b/bolt/test/X86/dwarf5-lowpc-highpc-convert.s index 4bb2b4837448..aba62ea98454 100644 --- a/bolt/test/X86/dwarf5-lowpc-highpc-convert.s +++ b/bolt/test/X86/dwarf5-lowpc-highpc-convert.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o # RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections --always-convert-to-ranges # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s # RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt diff --git a/bolt/test/X86/dwarf5-one-loclists-two-bases.test b/bolt/test/X86/dwarf5-one-loclists-two-bases.test index d85eca18b08a..7ef53f681381 100644 --- a/bolt/test/X86/dwarf5-one-loclists-two-bases.test +++ b/bolt/test/X86/dwarf5-one-loclists-two-bases.test @@ -53,5 +53,5 @@ # Checking second CU # POSTCHECK: version = 0x0005 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c) -# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000035) +# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000025) # POSTCHECK-NOT: DW_AT_location diff --git a/bolt/test/X86/dwarf5-rangeoffset-to-rangeindex.s b/bolt/test/X86/dwarf5-rangeoffset-to-rangeindex.s index 06b4819e2a84..481ff41c301f 100644 --- a/bolt/test/X86/dwarf5-rangeoffset-to-rangeindex.s +++ b/bolt/test/X86/dwarf5-rangeoffset-to-rangeindex.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o # RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections --always-convert-to-ranges # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s # RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt diff --git a/bolt/test/X86/dwarf5-split-dwarf4-monolithic.test b/bolt/test/X86/dwarf5-split-dwarf4-monolithic.test index eb3c844718fe..ec0d85fa30af 100644 --- a/bolt/test/X86/dwarf5-split-dwarf4-monolithic.test +++ b/bolt/test/X86/dwarf5-split-dwarf4-monolithic.test @@ -9,7 +9,7 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux -split-dwarf-file=helper1.dwo %p/Inputs/dwarf5-split-dwarf4-monolithic-helper1.s -o helper1.o # RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-dwarf4-monolithic-helper2.s -o helper2.o # RUN: %clang %cflags -dwarf-5 main.o helper0.o helper1.o helper2.o -o main.exe -Wl,-q -# RUN: llvm-bolt main.exe -o main.bolt --update-debug-sections +# RUN: llvm-bolt --always-convert-to-ranges main.exe -o main.bolt --update-debug-sections # RUN: llvm-dwarfdump --show-form --verbose --debug-info main.exe | FileCheck --check-prefix=PRECHECK %s # RUN: llvm-dwarfdump --show-form --verbose --debug-line main.exe | FileCheck --check-prefix=PRECHECK-LINE %s # RUN: llvm-dwarfdump --show-form --verbose --debug-addr main.bolt > boltout.txt diff --git a/bolt/test/X86/dwarf5-subprogram-multiple-ranges.test b/bolt/test/X86/dwarf5-subprogram-multiple-ranges.test new file mode 100644 index 000000000000..9fedd57b0c6f --- /dev/null +++ b/bolt/test/X86/dwarf5-subprogram-multiple-ranges.test @@ -0,0 +1,23 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-subprogram-multiple-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-objdump %t.bolt --disassemble > %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with multiple entries. + +# POSTCHECK: _Z7doStuffi>: +# POSTCHECK: [[#%.6x,ADDR:]] +# POSTCHECK: _Z7doStuffi.__part.1>: +# POSTCHECK-NEXT: [[#%.6x,ADDR1:]] +# POSTCHECK: _Z7doStuffi.__part.2>: +# POSTCHECK-NEXT: [[#%.6x,ADDR2:]] + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_ranges +# POSTCHECK-NEXT: [0x0000000000[[#ADDR]], 0x0000000000[[#ADDR + 0xf]]) +# POSTCHECK-NEXT: [0x0000000000[[#ADDR1]], 0x0000000000[[#ADDR1 + 0xb]]) +# POSTCHECK-NEXT: [0x0000000000[[#ADDR2]], 0x0000000000[[#ADDR2 + 0x5]]) diff --git a/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test new file mode 100644 index 000000000000..04b7203a5bea --- /dev/null +++ b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test @@ -0,0 +1,23 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-subprogram-single-gc-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. + +# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name +# POSTCHECK-SAME: _Z7doStuffi +# POSTCHECK-NEXT: DW_AT_name +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000001) address = 0x0000000000000000) +# POSTCHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000001) diff --git a/bolt/test/X86/dwarf5-subprogram-single-ranges.test b/bolt/test/X86/dwarf5-subprogram-single-ranges.test new file mode 100644 index 000000000000..f53780eeb5b0 --- /dev/null +++ b/bolt/test/X86/dwarf5-subprogram-single-ranges.test @@ -0,0 +1,25 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-subprogram-single-ranges-main.s -o %t1.o +# RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-objdump %t.bolt --disassemble > %t1.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s + +# This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry. + +# POSTCHECK: _Z7doStuffi>: +# POSTCHECK: [[#%.6x,ADDR:]] + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name +# POSTCHECK-SAME: _Z7doStuffi +# POSTCHECK-NEXT: DW_AT_name +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc [DW_FORM_addrx] (indexed (00000000) address = 0x0000000000[[#ADDR]]) +# POSTCHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x0000001b) diff --git a/bolt/test/X86/dwarf5-two-loclists.test b/bolt/test/X86/dwarf5-two-loclists.test index 2481b6f9701c..f5c399a944a9 100644 --- a/bolt/test/X86/dwarf5-two-loclists.test +++ b/bolt/test/X86/dwarf5-two-loclists.test @@ -64,7 +64,7 @@ # Checking second CU # POSTCHECK: version = 0x0005 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x00000045) -# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000035) +# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000025) # POSTCHECK-EMPTY: # POSTCHECK: DW_TAG_variable # POSTCHECK: DW_TAG_variable diff --git a/bolt/test/X86/dwarf5-two-rnglists.test b/bolt/test/X86/dwarf5-two-rnglists.test index 0b1bc0be3fc2..40cb31054e9c 100644 --- a/bolt/test/X86/dwarf5-two-rnglists.test +++ b/bolt/test/X86/dwarf5-two-rnglists.test @@ -3,7 +3,7 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_main.s -o %tmain.o # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_helper.s -o %thelper.o # RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-bolt --always-convert-to-ranges %t.exe -o %t.bolt --update-debug-sections # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s # RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt # RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt diff --git a/bolt/test/X86/dwarf5-types-backward-cross-reference.s b/bolt/test/X86/dwarf5-types-backward-cross-reference.s index 7aa4c3936a94..9278c23ef510 100644 --- a/bolt/test/X86/dwarf5-types-backward-cross-reference.s +++ b/bolt/test/X86/dwarf5-types-backward-cross-reference.s @@ -26,7 +26,7 @@ # POSTCHECK-SAME: version = 0x0005 # POSTCHECK: DW_TAG_variable [9] # POSTCHECK: DW_TAG_variable [12] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x0000000000000199 "Foo") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo") # main.cpp diff --git a/bolt/test/X86/dwarf5-types-forward-cross-reference.s b/bolt/test/X86/dwarf5-types-forward-cross-reference.s index b8f1bc2ceb57..feeb75da93a8 100644 --- a/bolt/test/X86/dwarf5-types-forward-cross-reference.s +++ b/bolt/test/X86/dwarf5-types-forward-cross-reference.s @@ -21,7 +21,7 @@ # POSTCHECK: Compile Unit # POSTCHECK-SAME: version = 0x0005 # POSTCHECK: DW_TAG_variable [9] -# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000000001f8 "Foo2a") +# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo2a") # POSTCHECK: Compile Unit # POSTCHECK-SAME: version = 0x0005 # POSTCHECK: DW_TAG_structure_type [11] diff --git a/bolt/test/X86/gdbindex.test b/bolt/test/X86/gdbindex.test index 36c22260e265..87a5ec142af2 100644 --- a/bolt/test/X86/gdbindex.test +++ b/bolt/test/X86/gdbindex.test @@ -18,8 +18,8 @@ RUN: llvm-dwarfdump -gdb-index %tfile.exe.bolt | FileCheck %s ; CHECK: Version = 7 ; CHECK: CU list offset = 0x18, has 2 entries: -; CHECK-NEXT: 0: Offset = 0x0, Length = 0x4b -; CHECK-NEXT: 1: Offset = 0x4b, Length = 0x4f +; CHECK-NEXT: 0: Offset = 0x0, Length = 0x4f +; CHECK-NEXT: 1: Offset = 0x4f, Length = 0x53 ; CHECK: Types CU list offset = 0x38, has 0 entries: -- Gitee From b3a14edb8cba2f47197e8020f31d2e38968518e1 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Fri, 18 Aug 2023 18:43:17 +0000 Subject: [PATCH 02/47] [CodeGen] Use the TII hook for Noop insertion in BBSections (NFC) Refactor BasicBlockSections to use the target-specific noop insertion hook from TargetInstrInfo instead of building it ourselves. Using the TII hook is both cleaner and makes it easier to extend BBSections to non-X86 targets. Differential Revision: https://reviews.llvm.org/D158303 --- llvm/lib/CodeGen/BasicBlockSections.cpp | 4 +--- llvm/lib/Target/X86/X86InstrInfo.cpp | 6 ++++++ llvm/lib/Target/X86/X86InstrInfo.h | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 6967ca5160c0..76fe1d96dbcc 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -285,9 +285,7 @@ void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) { MachineBasicBlock::iterator MI = MBB.begin(); while (!MI->isEHLabel()) ++MI; - MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop(); - BuildMI(MBB, MI, DebugLoc(), - MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode())); + MF.getSubtarget().getInstrInfo()->insertNoop(MBB, MI); } } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index e615fa09608c..eb6717448d5b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8442,6 +8442,12 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { MI.setDesc(get(table[Domain - 1])); } +void X86InstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(X86::NOOP)); +} + /// Return the noop instruction to use for a noop. MCInst X86InstrInfo::getNop() const { MCInst Nop; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 82554032ebd6..9a072c6569fe 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -457,6 +457,9 @@ public: int64_t Offset2, unsigned NumLoads) const override; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; + MCInst getNop() const override; bool -- Gitee From d0597addea6973abbad1b6c3c397ad54509015bb Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 18 Aug 2023 18:16:30 +0000 Subject: [PATCH 03/47] [Propeller] Deprecate Codegen paths for SHT_LLVM_BB_ADDR_MAP version 1. This patch removes the `getBBIDOrNumber` which was introduced to allow emitting version 1. Reviewed By: shenhan Differential Revision: https://reviews.llvm.org/D158299 --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ------ llvm/lib/CodeGen/BasicBlockSections.cpp | 18 +++++------------- llvm/lib/CodeGen/MachineBasicBlock.cpp | 5 ----- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 95ac9acf4e5e..d52340b4099d 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -639,12 +639,6 @@ public: std::optional getBBID() const { return BBID; } - /// Returns the BBID of the block when BBAddrMapVersion >= 2, otherwise - /// returns `MachineBasicBlock::Number`. - /// TODO: Remove this function when version 1 is deprecated and replace its - /// uses with `getBBID()`. - unsigned getBBIDOrNumber() const; - /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 76fe1d96dbcc..33e70b160d92 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -225,9 +225,7 @@ assignSections(MachineFunction &MF, // blocks are ordered canonically. MBB.setSectionID(MBB.getNumber()); } else { - // TODO: Replace `getBBIDOrNumber` with `getBBID` once version 1 is - // deprecated. - auto I = FuncBBClusterInfo.find(MBB.getBBIDOrNumber()); + auto I = FuncBBClusterInfo.find(*MBB.getBBID()); if (I != FuncBBClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { @@ -325,14 +323,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) return true; - // Renumber blocks before sorting them. This is useful during sorting, - // basic blocks in the same section will retain the default order. - // This renumbering should also be done for basic block labels to match the - // profiles with the correct blocks. - // For LLVM_BB_ADDR_MAP versions 2 and higher, this renumbering serves - // the different purpose of accessing the original layout positions and - // finding the original fallthroughs. - // TODO: Change the above comment accordingly when version 1 is deprecated. + // Renumber blocks before sorting them. This is useful for accessing the + // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); if (BBSectionsType == BasicBlockSection::Labels) { @@ -383,8 +375,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return FuncBBClusterInfo.lookup(X.getBBIDOrNumber()).PositionInCluster < - FuncBBClusterInfo.lookup(Y.getBBIDOrNumber()).PositionInCluster; + return FuncBBClusterInfo.lookup(*X.getBBID()).PositionInCluster < + FuncBBClusterInfo.lookup(*Y.getBBID()).PositionInCluster; return X.getNumber() < Y.getNumber(); }; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 327cd40f86a4..c4a0474e393a 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1766,11 +1766,6 @@ bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const { return false; } -unsigned MachineBasicBlock::getBBIDOrNumber() const { - uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion(); - return BBAddrMapVersion < 2 ? getNumber() : *getBBID(); -} - const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); -- Gitee From e98b38a5b99bcf53f58dc42ac80e93c7884acd7c Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 22 Aug 2023 22:00:03 +0000 Subject: [PATCH 04/47] Add a pass to garbage-collect empty basic blocks after code generation. Propeller and pseudo-probes map profiles back to Machine IR via basic block addresses that are stored in metadata sections. Empty basic blocks (basic blocks without real code) obfuscate the profile mapping because their addresses collide with their next basic blocks. For instance, the fallthrough block of an empty block should always be adjacent to it. Otherwise, a completely unnecessary jump would be added. This patch adds a MachineFunction pass named `GCEmptyBasicBlocks` which attempts to garbage-collect the empty blocks before the `BasicBlockSections` and pass. This pass removes each empty basic block after redirecting its incoming edges to its fall-through block. The garbage-collection is not complete. We keep the empty block in 4 cases: 1. The empty block is an exception handling pad. 2. The empty block has its address taken. 3. The empty block is the last block of the function and it has predecessors. 4. The empty block is the only block of the function. The first three cases are extremely rare in normal code (no cases for the clang binary). Removing the blocks under the first two cases requires modifying exception handling structures and operands of non-terminator instructions -- which is doable but not worth the additional complexity in the pass. Reviewed By: tmsriram Differential Revision: https://reviews.llvm.org/D107534 --- llvm/include/llvm/CodeGen/Passes.h | 7 ++ llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp | 87 +++++++++++++++++++ llvm/lib/CodeGen/TargetPassConfig.cpp | 8 ++ ...basic-block-sections-labels-empty-block.ll | 21 +++++ .../test/CodeGen/X86/gc-empty-basic-blocks.ll | 36 ++++++++ 7 files changed, 161 insertions(+) create mode 100644 llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll create mode 100644 llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 11bc1d48a93d..04888ad90b5d 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -54,6 +54,13 @@ namespace llvm { /// the entry block. FunctionPass *createUnreachableBlockEliminationPass(); + /// createGCEmptyBasicblocksPass - Empty basic blocks (basic blocks without + /// real code) appear as the result of optimization passes removing + /// instructions. These blocks confuscate profile analysis (e.g., basic block + /// sections) since they will share the address of their fallthrough blocks. + /// This pass garbage-collects such basic blocks. + MachineFunctionPass *createGCEmptyBasicBlocksPass(); + /// createBasicBlockSections Pass - This pass assigns sections to machine /// basic blocks and is enabled with -fbasic-block-sections. MachineFunctionPass *createBasicBlockSectionsPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 89160cfd17d1..f5e580fd3569 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -128,6 +128,7 @@ void initializeFixupStatepointCallerSavedPass(PassRegistry&); void initializeFlattenCFGLegacyPassPass(PassRegistry &); void initializeFuncletLayoutPass(PassRegistry&); void initializeCallHeightAnalysisWrapperPass(PassRegistry &); +void initializeGCEmptyBasicBlocksPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 579074408b55..326093ecff59 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -200,6 +200,7 @@ add_llvm_component_library(LLVMCodeGen RegisterCoalescer.cpp RegisterPressure.cpp RegisterScavenging.cpp + GCEmptyBasicBlocks.cpp RemoveRedundantDebugValues.cpp RenameIndependentSubregs.cpp MachineStableHash.cpp diff --git a/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp b/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp new file mode 100644 index 000000000000..ec613d525822 --- /dev/null +++ b/llvm/lib/CodeGen/GCEmptyBasicBlocks.cpp @@ -0,0 +1,87 @@ +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "gc-empty-basic-blocks" + +STATISTIC(NumEmptyBlocksRemoved, "Number of empty blocks removed"); + +class GCEmptyBasicBlocks : public MachineFunctionPass { +public: + static char ID; + + GCEmptyBasicBlocks() : MachineFunctionPass(ID) { + initializeGCEmptyBasicBlocksPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Remove Empty Basic Blocks."; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +bool GCEmptyBasicBlocks::runOnMachineFunction(MachineFunction &MF) { + if (MF.size() < 2) + return false; + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + int NumRemoved = 0; + + // Iterate over all blocks except the last one. We can't remove the last block + // since it has no fallthrough block to rewire its predecessors to. + for (MachineFunction::iterator MBB = MF.begin(), + LastMBB = MachineFunction::iterator(MF.back()), + NextMBB; + MBB != LastMBB; MBB = NextMBB) { + NextMBB = std::next(MBB); + // TODO If a block is an eh pad, or it has address taken, we don't remove + // it. Removing such blocks is possible, but it probably requires a more + // complex logic. + if (MBB->isEHPad() || MBB->isMachineBlockAddressTaken()) + continue; + // Skip blocks with real code. + bool HasAnyRealCode = llvm::any_of(*MBB, [](const MachineInstr &MI) { + return !MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && + !MI.isDebugInstr(); + }); + if (HasAnyRealCode) + continue; + + LLVM_DEBUG(dbgs() << "Removing basic block " << MBB->getName() + << " in function " << MF.getName() << ":\n" + << *MBB << "\n"); + SmallVector Preds(MBB->predecessors()); + // Rewire the predecessors of this block to use the next block. + for (auto &Pred : Preds) + Pred->ReplaceUsesOfBlockWith(&*MBB, &*NextMBB); + // Update the jump tables. + if (JTI) + JTI->ReplaceMBBInJumpTables(&*MBB, &*NextMBB); + // Remove this block from predecessors of all its successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end() - 1); + // Finally, remove the block from the function. + MBB->eraseFromParent(); + ++NumRemoved; + } + NumEmptyBlocksRemoved += NumRemoved; + return NumRemoved != 0; +} + +char GCEmptyBasicBlocks::ID = 0; +INITIALIZE_PASS(GCEmptyBasicBlocks, "gc-empty-basic-blocks", + "Removes empty basic blocks and redirects their uses to their " + "fallthrough blocks.", + false, false) + +MachineFunctionPass *llvm::createGCEmptyBasicBlocksPass() { + return new GCEmptyBasicBlocks(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 98ea2f21b3c8..87ac68c834a8 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -250,6 +250,11 @@ static cl::opt DisableSelectOptimize( "disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running")); +/// Enable garbage-collecting empty basic blocks. +static cl::opt + GCEmptyBlocks("gc-empty-basic-blocks", cl::init(false), cl::Hidden, + cl::desc("Enable garbage-collecting empty basic blocks")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1245,6 +1250,9 @@ void TargetPassConfig::addMachinePasses() { addPass(createMachineOutlinerPass(RunOnAllFunctions)); } + if (GCEmptyBlocks) + addPass(llvm::createGCEmptyBasicBlocksPass()); + if (EnableFSDiscriminator) addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll new file mode 100644 index 000000000000..8e0f4fa7bc92 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll @@ -0,0 +1,21 @@ +;; This test verifies that with -gc-empty-basic-blocks SHT_LLVM_BB_ADDR_MAP will not include entries for empty blocks. +; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-sections=labels -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s + +define void @foo(i1 zeroext %0) nounwind { + br i1 %0, label %2, label %empty_block + +2: ; preds = %1 + %3 = call i32 @bar() + br label %4 + +empty_block: ; preds = %1 + unreachable + +4: ; preds = %2, %empty_block + ret void +} + +declare i32 @bar() + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text +; CHECK: .byte 3 # number of basic blocks diff --git a/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll b/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll new file mode 100644 index 000000000000..bac885a71b4c --- /dev/null +++ b/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll @@ -0,0 +1,36 @@ +;; This test verifies that -gc-empty-basic-blocks removes empty blocks. +; RUN: llc < %s -mtriple=x86_64 -O0 -gc-empty-basic-blocks | FileCheck -check-prefix=CHECK %s +; RUN: llc < %s -mtriple=x86_64 -stats -O0 -gc-empty-basic-blocks 2>&1 | FileCheck -check-prefix=STAT %s + +; STAT: 1 gc-empty-basic-blocks - Number of empty blocks removed + +define void @foo(i1 zeroext %0) nounwind { + br i1 %0, label %2, label %empty_block + +; CHECK: .text +; CHECK-LABEL: foo: +; CHECK: jne .LBB0_1 +; CHECK-NEXT: jmp .LBB0_3 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %4 + +; CHECK-LABEL: .LBB0_1: +; CHECK: jmp .LBB0_3 + +empty_block: ; preds = %1 + unreachable + +; CHECK-NOT: %empty_block +; CHECK-NOT: .LBB0_2 + +4: ; preds = %2, %empty_block + ret void + +; CHECK-LABEL: .LBB0_3: +; CHECK: retq + +} + +declare i32 @bar() -- Gitee From d9edc27879b398a891c3cfa32c5d2afffa8c4234 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 21 Aug 2023 19:55:24 +0000 Subject: [PATCH 05/47] [BasicBlockSections] avoid insertting redundant branch to fall through blocks --- llvm/lib/CodeGen/BasicBlockSections.cpp | 14 ++++- ...asic-block-sections-avoids-redundant-br.ll | 56 +++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 33e70b160d92..4ebff65a2daf 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -134,6 +134,14 @@ INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare", "into clusters of basic blocks.", false, false) +// Returns whether the given basic block has an unconditional branch. +bool hasUnconditionalBranch(const MachineBasicBlock &MBB) { + if (MBB.terminators().empty()) + return false; + const MachineInstr &Terminator = *(--MBB.terminators().end()); + return Terminator.isUnconditionalBranch(); +} + // This function updates and optimizes the branching instructions of every basic // block in a given function to account for changes in the layout. static void @@ -145,12 +153,14 @@ updateBranches(MachineFunction &MF, auto NextMBBI = std::next(MBB.getIterator()); auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()]; // If this block had a fallthrough before we need an explicit unconditional - // branch to that block if either + // branch to that block if either one of these two conditions hold and the + // block doesn't currently have an unconditional branch. // 1- the block ends a section, which means its next block may be // reorderd by the linker, or // 2- the fallthrough block is not adjacent to the block in the new // order. - if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB)) + if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB) && + !hasUnconditionalBranch(MBB)) TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc()); // We do not optimize branches for machine basic blocks ending sections, as diff --git a/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll b/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll new file mode 100644 index 000000000000..6571e2737952 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll @@ -0,0 +1,56 @@ +; Tests that basic block sections avoids inserting an unconditional branch when a basic block +; already has an unconditional branch to its fallthrough block. +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=all -O0 | FileCheck %s +; This test case is generated from code: +; int +; mapping (int len) +; { +; switch (len) +; { +; case 7: return 333; +; default: +; goto unknown; +; } +; unknown: +; return 0; +; } +; clang -O0 -fbasic-block-sections=all test.c + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @mapping(i32 noundef %len) { +entry: + %retval = alloca i32, align 4 + %len.addr = alloca i32, align 4 + store i32 %len, ptr %len.addr, align 4 + %0 = load i32, ptr %len.addr, align 4 + switch i32 %0, label %sw.default [ + i32 7, label %sw.bb + ] + +sw.bb: ; preds = %entry + store i32 333, ptr %retval, align 4 + br label %return + +sw.default: ; preds = %entry + br label %unknown + +unknown: ; preds = %sw.default + store i32 0, ptr %retval, align 4 + br label %return + +return: ; preds = %unknown, %sw.bb + %1 = load i32, ptr %retval, align 4 + ret i32 %1 +} + +; CHECK: mapping: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}} +; CHECK-NEXT: movl {{.*}} +; CHECK-NEXT: subl {{.*}} +; CHECK-NEXT: jne mapping.__part.2 +; CHECK-NEXT: jmp mapping.__part.1 +; CHECK-NOT: jmp +; CHECK: mapping.__part.1: +; CHECK: mapping.__part.2: -- Gitee From 86de99e94e6ec5e4467407e9011e0fe063bdfbcb Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 23 Aug 2023 01:08:44 +0000 Subject: [PATCH 06/47] Revert "[BasicBlockSections] avoid insertting redundant branch to fall through blocks" This reverts commit ab53109166c0345a79cbd6939cf7bc764a982856 which was commited by mistake. --- llvm/lib/CodeGen/BasicBlockSections.cpp | 14 +---- ...asic-block-sections-avoids-redundant-br.ll | 56 ------------------- 2 files changed, 2 insertions(+), 68 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 4ebff65a2daf..33e70b160d92 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -134,14 +134,6 @@ INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare", "into clusters of basic blocks.", false, false) -// Returns whether the given basic block has an unconditional branch. -bool hasUnconditionalBranch(const MachineBasicBlock &MBB) { - if (MBB.terminators().empty()) - return false; - const MachineInstr &Terminator = *(--MBB.terminators().end()); - return Terminator.isUnconditionalBranch(); -} - // This function updates and optimizes the branching instructions of every basic // block in a given function to account for changes in the layout. static void @@ -153,14 +145,12 @@ updateBranches(MachineFunction &MF, auto NextMBBI = std::next(MBB.getIterator()); auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()]; // If this block had a fallthrough before we need an explicit unconditional - // branch to that block if either one of these two conditions hold and the - // block doesn't currently have an unconditional branch. + // branch to that block if either // 1- the block ends a section, which means its next block may be // reorderd by the linker, or // 2- the fallthrough block is not adjacent to the block in the new // order. - if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB) && - !hasUnconditionalBranch(MBB)) + if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB)) TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc()); // We do not optimize branches for machine basic blocks ending sections, as diff --git a/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll b/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll deleted file mode 100644 index 6571e2737952..000000000000 --- a/llvm/test/CodeGen/X86/basic-block-sections-avoids-redundant-br.ll +++ /dev/null @@ -1,56 +0,0 @@ -; Tests that basic block sections avoids inserting an unconditional branch when a basic block -; already has an unconditional branch to its fallthrough block. -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=all -O0 | FileCheck %s -; This test case is generated from code: -; int -; mapping (int len) -; { -; switch (len) -; { -; case 7: return 333; -; default: -; goto unknown; -; } -; unknown: -; return 0; -; } -; clang -O0 -fbasic-block-sections=all test.c - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @mapping(i32 noundef %len) { -entry: - %retval = alloca i32, align 4 - %len.addr = alloca i32, align 4 - store i32 %len, ptr %len.addr, align 4 - %0 = load i32, ptr %len.addr, align 4 - switch i32 %0, label %sw.default [ - i32 7, label %sw.bb - ] - -sw.bb: ; preds = %entry - store i32 333, ptr %retval, align 4 - br label %return - -sw.default: ; preds = %entry - br label %unknown - -unknown: ; preds = %sw.default - store i32 0, ptr %retval, align 4 - br label %return - -return: ; preds = %unknown, %sw.bb - %1 = load i32, ptr %retval, align 4 - ret i32 %1 -} - -; CHECK: mapping: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: movl {{.*}} -; CHECK-NEXT: movl {{.*}} -; CHECK-NEXT: subl {{.*}} -; CHECK-NEXT: jne mapping.__part.2 -; CHECK-NEXT: jmp mapping.__part.1 -; CHECK-NOT: jmp -; CHECK: mapping.__part.1: -; CHECK: mapping.__part.2: -- Gitee From ca83184be8b6ff32165ba16cfc236551341ca763 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Mon, 21 Aug 2023 16:29:02 +0000 Subject: [PATCH 07/47] [CodeGen] Fix unconditional branch duplication issue in bbsections If an end section basic block ends in an unconditional branch to its fallthrough, BasicBlockSections will duplicate the unconditional branch. This doesn't break x86, but it is a (slight) size optimization and more importantly prevents AArch64 builds from breaking. Ex: ``` bb1 (bbsections Hot): jmp bb2 bb2 (bbsections Cold): /* do work... */ ``` After running sortBasicBlocksAndUpdateBranches(): ``` bb1 (bbsections Hot): jmp bb2 jmp bb2 bb2 (bbsections Cold): /* do work... */ ``` Differential Revision: https://reviews.llvm.org/D158674 --- llvm/lib/CodeGen/BasicBlockSections.cpp | 3 +- .../CodeGen/X86/machine-function-splitter.ll | 44 +++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 33e70b160d92..de7c17082fa4 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -258,7 +258,8 @@ void llvm::sortBasicBlocksAndUpdateBranches( [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front(); SmallVector PreLayoutFallThroughs(MF.getNumBlockIDs()); for (auto &MBB : MF) - PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + PreLayoutFallThroughs[MBB.getNumber()] = + MBB.getFallThrough(/*JumpToFallThrough=*/false); MF.sort(MBBCmp); assert(&MF.front() == EntryBlock && diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index 03c0f93b342d..7113c42b2a7e 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -1,7 +1,20 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1 -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2 -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefix=MFS-EH-SPLIT +; REQUIRES: aarch64-registered-target +; REQUIRES: x86-registered-target + +; COM: Machine function splitting with FDO profiles +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 +; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 + +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64 +; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -O0 -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s --dump-input=always -check-prefixes=MFS-O0,MFS-O0-AARCH64 + +; COM: Machine function splitting with AFDO profiles ; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll ; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS ; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS2 @@ -432,6 +445,29 @@ define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 { ret void } +define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that an unconditional branch is only appended to a block +;; if it would fall through to the wrong block otherwise. +; MFS-O0-LABEL: foo16 +; MFS-O0-X86: jmp +; MFS-O0-X86-NOT: jmp +; MFS-O0-AARCH64: br +; MFS-O0-AARCH64: br +; MFS-O0-AARCH64-NOT: br +; MFS-O0: .section .text.split.foo16 +; MFS-O0-NEXT: foo16.cold + %2 = call i32 @baz() + br i1 false, label %3, label %5, !prof !25 + +3: ; preds = %1 + %4 = call i32 @bar() + unreachable + +5: ; preds = %1 + %6 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() -- Gitee From b483d5bf6b3604c88740a8fce7bb33c4c15ed356 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Thu, 24 Aug 2023 16:58:19 +0000 Subject: [PATCH 08/47] Revert "[CodeGen] Fix unconditional branch duplication issue in bbsections" This reverts commit 994eb5adc40cd001d82d0f95d18d1827b57e496c. Breaks buildbot `llvm-clang-x86_64-expensive-checks-debian` https://lab.llvm.org/buildbot/#/builders/16/builds/53620 --- llvm/lib/CodeGen/BasicBlockSections.cpp | 3 +-- .../CodeGen/X86/machine-function-splitter.ll | 25 ------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index de7c17082fa4..33e70b160d92 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -258,8 +258,7 @@ void llvm::sortBasicBlocksAndUpdateBranches( [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front(); SmallVector PreLayoutFallThroughs(MF.getNumBlockIDs()); for (auto &MBB : MF) - PreLayoutFallThroughs[MBB.getNumber()] = - MBB.getFallThrough(/*JumpToFallThrough=*/false); + PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); MF.sort(MBBCmp); assert(&MF.front() == EntryBlock && diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index 7113c42b2a7e..0f2e5562cc5a 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -6,13 +6,11 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 -; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64 -; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -O0 -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s --dump-input=always -check-prefixes=MFS-O0,MFS-O0-AARCH64 ; COM: Machine function splitting with AFDO profiles ; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll @@ -445,29 +443,6 @@ define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 { ret void } -define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { -;; Check that an unconditional branch is only appended to a block -;; if it would fall through to the wrong block otherwise. -; MFS-O0-LABEL: foo16 -; MFS-O0-X86: jmp -; MFS-O0-X86-NOT: jmp -; MFS-O0-AARCH64: br -; MFS-O0-AARCH64: br -; MFS-O0-AARCH64-NOT: br -; MFS-O0: .section .text.split.foo16 -; MFS-O0-NEXT: foo16.cold - %2 = call i32 @baz() - br i1 false, label %3, label %5, !prof !25 - -3: ; preds = %1 - %4 = call i32 @bar() - unreachable - -5: ; preds = %1 - %6 = tail call i32 @qux() - ret void -} - declare i32 @bar() declare i32 @baz() declare i32 @bam() -- Gitee From 19833d6091dd3d04947f8faef6101973a81d1e78 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Mon, 21 Aug 2023 16:29:02 +0000 Subject: [PATCH 09/47] Reland "[CodeGen] Fix unconditional branch duplication issue in bbsections" Reverted in 4c8d056f50342d5401f5930ed60e5e48b211c3fb because it broke buildbot `llvm-clang-x86_64-expensive-checks-debian` due to the AArch64 test generating invalid code. The issue still exists, but it's fixed in D156767, so the AArch64 test should be added there. Differential Revision: https://reviews.llvm.org/D158674 --- llvm/lib/CodeGen/BasicBlockSections.cpp | 3 ++- .../CodeGen/X86/machine-function-splitter.ll | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 33e70b160d92..de7c17082fa4 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -258,7 +258,8 @@ void llvm::sortBasicBlocksAndUpdateBranches( [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front(); SmallVector PreLayoutFallThroughs(MF.getNumBlockIDs()); for (auto &MBB : MF) - PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + PreLayoutFallThroughs[MBB.getNumber()] = + MBB.getFallThrough(/*JumpToFallThrough=*/false); MF.sort(MBBCmp); assert(&MF.front() == EntryBlock && diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index 0f2e5562cc5a..f725c471e267 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 +; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 @@ -443,6 +444,29 @@ define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 { ret void } +define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that an unconditional branch is only appended to a block +;; if it would fall through to the wrong block otherwise. +; MFS-O0-LABEL: foo16 +; MFS-O0-X86: jmp +; MFS-O0-X86-NOT: jmp +; MFS-O0-AARCH64: br +; MFS-O0-AARCH64: br +; MFS-O0-AARCH64-NOT: br +; MFS-O0: .section .text.split.foo16 +; MFS-O0-NEXT: foo16.cold + %2 = call i32 @baz() + br i1 false, label %3, label %5, !prof !25 + +3: ; preds = %1 + %4 = call i32 @bar() + unreachable + +5: ; preds = %1 + %6 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() -- Gitee From b32af3f10f1e08f7d82701c368457dd33e6ded31 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 22 Sep 2023 15:37:04 -0400 Subject: [PATCH 10/47] [BasicBlockSections] Introduce the basic block sections profile version 1. (#65506) This patch introduces a new version for the basic block sections profile as was requested in D158442, while keeping backward compatibility for the old version. The new encoding is as follows: ``` m f ... c c ... ``` Module name specifier (starting with 'm') is optional and allows distinguishing profiles for internal-linkage functions with the same name. If not specified, profile will be applied to any function with the same name. Function name specifier (starting with 'f') can specify multiple function name aliases. Finally, basic block clusters are specified by 'c' and specify the cluster of basic blocks, and the internal order in which they must be placed in the same section. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 23 +- .../BasicBlockSectionsProfileReader.cpp | 208 ++++++++++++++---- .../basic-block-sections-clusters-error.ll | 33 ++- .../X86/basic-block-sections-clusters.ll | 20 +- .../CodeGen/X86/basic-block-sections-cold.ll | 21 +- .../CodeGen/X86/basic-block-sections-list.ll | 19 +- .../X86/basic-block-sections-module1.ll | 24 ++ 7 files changed, 286 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index cd27fea771ba..ad26eee642ea 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H #define LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -26,7 +25,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -using namespace llvm; namespace llvm { @@ -47,7 +45,8 @@ public: static char ID; BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) - : ImmutablePass(ID), MBuf(Buf) { + : ImmutablePass(ID), MBuf(Buf), + LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') { initializeBasicBlockSectionsProfileReaderPass( *PassRegistry::getPassRegistry()); }; @@ -84,12 +83,30 @@ private: return R == FuncAliasMap.end() ? FuncName : R->second; } + // Returns a profile parsing error for the current line. + Error createProfileParseError(Twine Message) const { + return make_error( + Twine("invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + } + // Reads the basic block sections profile for functions in this module. Error ReadProfile(); + // Reads version 0 profile. + // TODO: Remove this function once version 0 is deprecated. + Error ReadV0Profile(); + + // Reads version 1 profile. + Error ReadV1Profile(); + // This contains the basic-block-sections profile. const MemoryBuffer *MBuf = nullptr; + // Iterator to the line being parsed. + line_iterator LineIt; + // Map from every function name in the module to its debug info filename or // empty string if no debug info is available. StringMap> FunctionNameToDIFilename; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 5dede452ec34..ef5f1251f532 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -18,9 +18,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Pass.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -46,34 +47,121 @@ BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( : std::pair(false, SmallVector{}); } -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// Optionally, a debug-info filename can be specified for each function to allow -// distinguishing internal-linkage functions of the same name. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// (Profile for function foo is only loaded when its debug-info filename -// matches 'path/to/foo_file.cc'). -// ---------------------------- -// list.txt: -// !main -// !foo M=path/to/foo_file.cc -// !!1 2 -// !!4 -Error BasicBlockSectionsProfileReader::ReadProfile() { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); +// Reads the version 1 basic block sections profile. Profile for each function +// is encoded as follows: +// m +// f ... +// c +// c +// ... +// Module name specifier (starting with 'm') is optional and allows +// distinguishing profile for internal-linkage functions with the same name. If +// not specified, it will apply to any function with the same name. Function +// name specifier (starting with 'f') can specify multiple function name +// aliases. Basic block clusters are specified by 'c' and specify the cluster of +// basic blocks, and the internal order in which they must be placed in the same +// section. +Error BasicBlockSectionsProfileReader::ReadV1Profile() { + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet FuncBBIDs; + + // Debug-info-based module filename for the current function. Empty string + // means no filename. + StringRef DIFilename; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + char Specifier = S[0]; + S = S.drop_front().trim(); + SmallVector Values; + S.split(Values, ' '); + switch (Specifier) { + case '@': + break; + case 'm': // Module name speicifer. + if (Values.size() != 1) { + return createProfileParseError(Twine("invalid module name value: '") + + S + "'"); + } + DIFilename = sys::path::remove_leading_dotslash(Values[0]); + continue; + case 'f': { // Function names specifier. + bool FunctionFound = any_of(Values, [&](StringRef Alias) { + auto It = FunctionNameToDIFilename.find(Alias); + // No match if this function name is not found in this module. + if (It == FunctionNameToDIFilename.end()) + return false; + // Return a match if debug-info-filename is not specified. Otherwise, + // check for equality. + return DIFilename.empty() || It->second.equals(DIFilename); + }); + if (!FunctionFound) { + // Skip the following profile by setting the profile iterator (FI) to + // the past-the-end element. + FI = ProgramBBClusterInfo.end(); + DIFilename = ""; + continue; + } + for (size_t i = 1; i < Values.size(); ++i) + FuncAliasMap.try_emplace(Values[i], Values.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + auto R = ProgramBBClusterInfo.try_emplace(Values.front()); + // Report error when multiple profiles have been specified for the same + // function. + if (!R.second) + return createProfileParseError("duplicate profile for function '" + + Values.front() + "'"); + FI = R.first; + CurrentCluster = 0; + FuncBBIDs.clear(); + // We won't need DIFilename anymore. Clean it up to avoid its application + // on the next function. + DIFilename = ""; + continue; + } + case 'c': // Basic block cluster specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramBBClusterInfo.end()) + break; + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIDStr : Values) { + unsigned long long BBID; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); + if (!FuncBBIDs.insert(BBID).second) + return createProfileParseError( + Twine("duplicate basic block id found '") + BBIDStr + "'"); + if (BBID == 0 && CurrentPosition) + return createProfileParseError( + "entry BB (0) does not begin a cluster"); - auto invalidProfileError = [&](auto Message) { - return make_error( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; + FI->second.emplace_back( + BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + continue; + default: + return createProfileParseError(Twine("invalid specifier: '") + + Twine(Specifier) + "'"); + } + } + return Error::success(); +} +Error BasicBlockSectionsProfileReader::ReadV0Profile() { auto FI = ProgramBBClusterInfo.end(); // Current cluster ID corresponding to this function. @@ -105,13 +193,14 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { for (auto BBIDStr : BBIDs) { unsigned long long BBID; if (getAsUnsignedInteger(BBIDStr, 10, BBID)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIDStr + "'."); + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); if (!FuncBBIDs.insert(BBID).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIDStr + "'."); + return createProfileParseError( + Twine("duplicate basic block id found '") + BBIDStr + "'"); if (BBID == 0 && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); + return createProfileParseError( + "entry BB (0) does not begin a cluster"); FI->second.emplace_back( BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); @@ -126,10 +215,10 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { DIFilename = sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); if (DIFilename.empty()) - return invalidProfileError("Empty module name specifier."); + return createProfileParseError("empty module name specifier"); } else if (!DIFilenameStr.empty()) { - return invalidProfileError("Unknown string found: '" + DIFilenameStr + - "'."); + return createProfileParseError("unknown string found: '" + + DIFilenameStr + "'"); } // Function aliases are separated using '/'. We use the first function // name for the cluster info mapping and delegate all other aliases to @@ -160,8 +249,8 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { // Report error when multiple profiles have been specified for the same // function. if (!R.second) - return invalidProfileError("Duplicate profile for function '" + - Aliases.front() + "'."); + return createProfileParseError("duplicate profile for function '" + + Aliases.front() + "'"); FI = R.first; CurrentCluster = 0; FuncBBIDs.clear(); @@ -170,6 +259,51 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { return Error::success(); } +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// Optionally, a debug-info filename can be specified for each function to allow +// distinguishing internal-linkage functions of the same name. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// (Profile for function foo is only loaded when its debug-info filename +// matches 'path/to/foo_file.cc'). +// ---------------------------- +// list.txt: +// !main +// !foo M=path/to/foo_file.cc +// !!1 2 +// !!4 +Error BasicBlockSectionsProfileReader::ReadProfile() { + assert(MBuf); + + unsigned long long Version = 0; + StringRef FirstLine(*LineIt); + if (FirstLine.consume_front("v")) { + if (getAsUnsignedInteger(FirstLine, 10, Version)) { + return createProfileParseError(Twine("version number expected: '") + + FirstLine + "'"); + } + if (Version > 1) { + return createProfileParseError(Twine("invalid profile version: ") + + Twine(Version)); + } + ++LineIt; + } + + switch (Version) { + case 0: + // TODO: Deprecate V0 once V1 is fully integrated downstream. + return ReadV0Profile(); + case 1: + return ReadV1Profile(); + default: + llvm_unreachable("Invalid profile version."); + } +} + bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { if (!MBuf) return false; diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 15705b8051d4..5577601c02cf 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -1,27 +1,44 @@ -; BB cluster sections error handling +;; BB cluster sections error handling +;; Error handling for version 0: ; RUN: echo '!dummy1' > %t1 ; RUN: echo '!!1 4' >> %t1 ; RUN: echo '!!1' >> %t1 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 -; CHECK-ERROR1: LLVM ERROR: Invalid profile {{.*}} at line 3: Duplicate basic block id found '1'. +; CHECK-ERROR1: LLVM ERROR: invalid profile {{.*}} at line 3: duplicate basic block id found '1' ; RUN: echo '!dummy1' > %t2 ; RUN: echo '!!4 0' >> %t2 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 -; CHECK-ERROR2: LLVM ERROR: Invalid profile {{.*}} at line 2: Entry BB (0) does not begin a cluster. +; CHECK-ERROR2: LLVM ERROR: invalid profile {{.*}} at line 2: entry BB (0) does not begin a cluster ; RUN: echo '!dummy1' > %t3 ; RUN: echo '!!-1' >> %t3 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR3 -; CHECK-ERROR3: LLVM ERROR: Invalid profile {{.*}} at line 2: Unsigned integer expected: '-1'. +; CHECK-ERROR3: LLVM ERROR: invalid profile {{.*}} at line 2: unsigned integer expected: '-1' ; RUN: echo '!dummy1 /path/to/filename' > %t4 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR4 -; CHECK-ERROR4: LLVM ERROR: Invalid profile {{.*}} at line 1: Unknown string found: '/path/to/filename'. +; CHECK-ERROR4: LLVM ERROR: invalid profile {{.*}} at line 1: unknown string found: '/path/to/filename' ; RUN: echo '!dummy2 M=test_dir/test_file' > %t5 ; RUN: echo '!dummy2 M=test_dir/test_file' >> %t5 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t5 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR5 -; CHECK-ERROR5: LLVM ERROR: Invalid profile {{.*}} at line 2: Duplicate profile for function 'dummy2'. -; RUN: echo '!dummy1 M=' >> %t6 +; CHECK-ERROR5: LLVM ERROR: invalid profile {{.*}} at line 2: duplicate profile for function 'dummy2' +; RUN: echo '!dummy1 M=' > %t6 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t6 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR6 -; CHECK-ERROR6: LLVM ERROR: Invalid profile {{.*}} at line 1: Empty module name specifier. +; CHECK-ERROR6: LLVM ERROR: invalid profile {{.*}} at line 1: empty module name specifier +;; +;; Error handling for version 1: +; RUN: echo 'v2' > %t7 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t7 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR7 +; CHECK-ERROR7: LLVM ERROR: invalid profile {{.*}} at line 1: invalid profile version: 2 +; RUN: echo 'v1' > %t8 +; RUN: echo '!dummy1' >> %t8 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t8 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR8 +; CHECK-ERROR8: LLVM ERROR: invalid profile {{.*}} at line 2: invalid specifier: '!' +; RUN: echo 'v1' > %t0 +; RUN: echo 'm dummy1/module1 dummy1/module2' +; RUN: echo 'f dummy1' >> %t9 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t8 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR8 +; CHECK-ERROR9: LLVM ERROR: invalid profile {{.*}} at line 2: invalid module name value: 'dummy1/module dummy1/module2' + + define i32 @dummy1(i32 %x, i32 %y, i32 %z) { entry: diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll index a26f99a88a42..a2ea84ff8859 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll @@ -3,16 +3,30 @@ ; Test1: Basic blocks #0 (entry) and #2 will be placed in the same section. ; Basic block 1 will be placed in a unique section. ; The rest will be placed in the cold section. +;; +;; Profile for version 0: ; RUN: echo '!foo' > %t1 ; RUN: echo '!!0 2' >> %t1 ; RUN: echo '!!1' >> %t1 +;; +;; Profile for version 1: +; RUN: echo 'v1' > %t2 +; RUN: echo 'f foo' >> %t2 +; RUN: echo 'c 0 2' >> %t2 +; RUN: echo 'c 1' >> %t2 +; ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS1 ; ; Test2: Basic blocks #1 and #3 will be placed in the same section. ; All other BBs (including the entry block) go into the function's section. -; RUN: echo '!foo' > %t2 -; RUN: echo '!!1 3' >> %t2 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 +; RUN: echo '!foo' > %t3 +; RUN: echo '!!1 3' >> %t3 +; RUN: echo 'v1' > %t4 +; RUN: echo 'f foo' >> %t4 +; RUN: echo 'c 1 3' >> %t4 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t3 | FileCheck %s -check-prefix=LINUX-SECTIONS2 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t4 | FileCheck %s -check-prefix=LINUX-SECTIONS2 define void @foo(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll index c635b73a45b9..58cec1b658c1 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll @@ -1,9 +1,18 @@ -; Check if basic blocks that don't get unique sections are placed in cold sections. -; Basic block with id 1 and 2 must be in the cold section. -; RUN: echo '!_Z3bazb' > %t -; RUN: echo '!!0' >> %t -; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=LINUX-SPLIT +;; Check if basic blocks that don't get unique sections are placed in cold sections. +;; Basic block with id 1 and 2 must be in the cold section. +;; +;; Profile for version 0 +; RUN: echo '!_Z3bazb' > %t1 +; RUN: echo '!!0' >> %t1 +;; +;; Profile for version 1 +; RUN: echo 'v1' > %t2 +; RUN: echo 'f _Z3bazb' >> %t2 +; RUN: echo 'c 0' >> %t2 +;; +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t2 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=LINUX-SPLIT define void @_Z3bazb(i1 zeroext %0) nounwind { br i1 %0, label %2, label %4 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll index 050b7edaf479..45ef452f4f5c 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -1,8 +1,17 @@ -; Check the basic block sections list option. -; RUN: echo '!_Z3foob' > %t -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION -; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION -; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX +;; Check the basic block sections list option. +;; version 0 profile: +; RUN: echo '!_Z3foob' > %t1 +;; +;; version 1 profile: +; RUN: echo 'v1' > %t2 +; RUN: echo 'f _Z3foob' >> %t2 +;; +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t1 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t1 -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t2 -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS --check-prefix=LINUX-SECTIONS-NO-FUNCTION-SECTION +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t2 -unique-basic-block-section-names --bbsections-guided-section-prefix=false | FileCheck %s -check-prefix=LINUX-SECTIONS-NO-GUIDED-PREFIX define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-module1.ll b/llvm/test/CodeGen/X86/basic-block-sections-module1.ll index 1f11c4f102c1..9c1f7d15f10d 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-module1.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-module1.ll @@ -14,6 +14,30 @@ ; RUN: echo '!test M=./path/to/dir/test_filename' > %t4 ; RUN: echo '!!0' >> %t4 ; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t4 | FileCheck %s -check-prefix=WRONG-MODULE +;; Version 1 profile. +;; Specify the right filename. +; RUN: echo 'v1' > %t5 +; RUN: echo 'm /path/to/dir/test_filename' >> %t5 +; RUN: echo 'f test' >> %t5 +; RUN: echo 'c 0' >> %t5 +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t5 | FileCheck %s -check-prefix=RIGHT-MODULE +;; Specify no filename and verify that the profile is ingested. +; RUN: echo 'v1' > %t6 +; RUN: echo 'f test' >> %t6 +; RUN: echo 'c 0' >> %t6 +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t6 | FileCheck %s -check-prefix=NO-MODULE +;; Specify wrong filenames and verify that the profile is not ingested. +; RUN: echo 'v1' > %t7 +; RUN: echo 'm test_filename' >> %t7 +; RUN: echo 'f test' >> %t7 +; RUN: echo 'c 0' >> %t7 +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t7 | FileCheck %s -check-prefix=WRONG-MODULE +; RUN: echo 'v1' > %t8 +; RUN: echo 'm ./path/to/dir/test_filename' >> %t8 +; RUN: echo 'f test' >> %t8 +; RUN: echo 'c 0' >> %t8 +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=%t8 | FileCheck %s -check-prefix=WRONG-MODULE + define dso_local i32 @test(i32 noundef %0) #0 !dbg !10 { %2 = alloca i32, align 4 -- Gitee From 5bd19f5be13ca404997d8e718a15cc060f9ad51a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 22 Sep 2023 16:49:12 -0400 Subject: [PATCH 11/47] [BasicBlockSections] Split cold parts of custom-section functions. (#66731) This PR makes `-basic-block-sections` handle functions with custom non-dot-text sections correctly. Cold parts of such functions must be placed in the same section (not in `.text.split`) but with a unique id. --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 37 ++++++++++++------- .../basic-block-sections-pragma-sections.ll | 30 ++++++++++----- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4ffffd85ee53..2508295f68ff 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1038,21 +1038,32 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( // under the .text.eh prefix. For regular sections, we either use a unique // name, or a unique ID for the section. SmallString<128> Name; - if (MBB.getSectionID() == MBBSectionID::ColdSectionID) { - Name += BBSectionsColdTextPrefix; - Name += MBB.getParent()->getName(); - } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) { - Name += ".text.eh."; - Name += MBB.getParent()->getName(); - } else { - Name += MBB.getParent()->getSection()->getName(); - if (TM.getUniqueBasicBlockSectionNames()) { - if (!Name.endswith(".")) - Name += "."; - Name += MBB.getSymbol()->getName(); + StringRef FunctionSectionName = MBB.getParent()->getSection()->getName(); + if (FunctionSectionName.equals(".text") || + FunctionSectionName.startswith(".text.")) { + // Function is in a regular .text section. + StringRef FunctionName = MBB.getParent()->getName(); + if (MBB.getSectionID() == MBBSectionID::ColdSectionID) { + Name += BBSectionsColdTextPrefix; + Name += FunctionName; + } else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) { + Name += ".text.eh."; + Name += FunctionName; } else { - UniqueID = NextUniqueID++; + Name += FunctionSectionName; + if (TM.getUniqueBasicBlockSectionNames()) { + if (!Name.endswith(".")) + Name += "."; + Name += MBB.getSymbol()->getName(); + } else { + UniqueID = NextUniqueID++; + } } + } else { + // If the original function has a custom non-dot-text section, then emit + // all basic block sections into that section too, each with a unique id. + Name = FunctionSectionName; + UniqueID = NextUniqueID++; } unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; diff --git a/llvm/test/CodeGen/X86/basic-block-sections-pragma-sections.ll b/llvm/test/CodeGen/X86/basic-block-sections-pragma-sections.ll index d63fbdd7b362..13a4607100a0 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-pragma-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-pragma-sections.ll @@ -1,9 +1,15 @@ +;; Tests for basic block sections applied on a function in a custom section. ; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=all | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=all | FileCheck %s -; RUN: echo "!_Z3fooi" > %t.list.txt -; RUN: echo "!!2" >> %t.list.txt -; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t.list.txt | FileCheck %s --check-prefix=LIST -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t.list.txt | FileCheck %s --check-prefix=LIST +; RUN: echo "!_Z3fooi" > %t1.list.txt +; RUN: echo "!!2" >> %t1.list.txt +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t1.list.txt | FileCheck %s --check-prefix=LIST1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1.list.txt | FileCheck %s --check-prefix=LIST1 +; RUN: echo "!_Z3fooi" > %t2.list.txt +; RUN: echo "!!0" >> %t2.list.txt +; RUN: llc < %s -mtriple=x86_64-pc-linux -basic-block-sections=%t2.list.txt | FileCheck %s --check-prefix=LIST2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2.list.txt | FileCheck %s --check-prefix=LIST2 + ; CHECK: .section foo_section,"ax",@progbits,unique,1 ; CHECK-LABEL: _Z3fooi: @@ -12,11 +18,17 @@ ; CHECK: .section foo_section,"ax",@progbits,unique,3 ; CHECK-NEXT: _Z3fooi.__part.2: -; LIST: .section foo_section,"ax",@progbits,unique,1 -; LIST-LABEL: _Z3fooi: -; LIST: .section foo_section,"ax",@progbits,unique,2 -; LIST-NEXT: _Z3fooi.__part.0: -; LIST-NOT: .section foo_section,"ax",@progbits,unique,3 +; LIST1: .section foo_section,"ax",@progbits,unique,1 +; LIST1-LABEL: _Z3fooi: +; LIST1: .section foo_section,"ax",@progbits,unique,2 +; LIST1-NEXT: _Z3fooi.__part.0: +; LIST1-NOT: .section foo_section,"ax",@progbits,unique,3 + +; LIST2: .section foo_section,"ax",@progbits,unique,1 +; LIST2-LABEL: _Z3fooi: +; LIST2: .section foo_section,"ax",@progbits,unique,2 +; LIST2-NEXT: _Z3fooi.cold: +; LIST2-NOT: .section foo_section,"ax",@progbits,unique,3 ;; Source to generate the IR: ;; #pragma clang section text = "foo_section" -- Gitee From 32ecb30720f8685dc31fa77ac27d102ddc8f5f6f Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 12 Oct 2023 01:47:13 -0400 Subject: [PATCH 12/47] [BasicBlockSections] Introduce the path cloning profile format to BasicBlockSectionsProfileReader. (#67214) Following up on prior RFC (https://lists.llvm.org/pipermail/llvm-dev/2020-September/145357.html) we can now improve above our highly-optimized basic-block-sections binary (e.g., 2% for clang) by applying path cloning. Cloning can improve performance by reducing taken branches. This patch prepares the profile format for applying cloning actions. The basic block cloning profile format extends the basic block sections profile in two ways. 1. Specifies the cloning paths with a 'p' specifier. For example, `p 1 4 5` specifies that blocks with BB ids 4 and 5 must be cloned along the edge 1 --> 4. 2. For each cloned block, it will appear in the cluster info as `.` where `clone_id` is the id associated with this clone. For example, the following profile specifies one cloned block (2) and determines its cluster position as well. ``` f foo p 1 2 c 0 1 2.1 3 2 5 ``` This patch keeps backward-compatibility (retains the behavior for old profile formats). This feature is only introduced for profile version >= 1. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 77 ++++++++-- llvm/lib/CodeGen/BasicBlockSections.cpp | 73 ++++----- .../BasicBlockSectionsProfileReader.cpp | 145 ++++++++++++++---- .../basic-block-sections-clusters-error.ll | 35 ++++- 4 files changed, 234 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index ad26eee642ea..6e01dfd11ee6 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -28,17 +28,60 @@ namespace llvm { -// The cluster information for a machine basic block. -struct BBClusterInfo { - // Unique ID for this basic block. +// This structure represents a unique ID for every block specified in the +// input profile. +struct ProfileBBID { + // Basic block id associated with `MachineBasicBlock::BBID`. unsigned BBID; + // The clone id associated with the block. This is zero for the original + // block. For the cloned ones, it is equal to 1 + index of the associated + // path in `FunctionPathAndClusterInfo::ClonePaths`. + unsigned CloneID; +}; + +// This struct represents the cluster information for a machine basic block, +// which is specifed by a unique ID. This templated struct is used for both the +// raw input profile (as `BBClusterInfo`) and the processed profile +// after applying the clonings (as `BBClusterInfo`). +template struct BBClusterInfo { + // Basic block ID. + BBIDType BasicBlockID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. unsigned PositionInCluster; }; -using ProgramBBClusterInfoMapTy = StringMap>; +// This represents the raw input profile for one function. +struct FunctionPathAndClusterInfo { + // BB Cluster information specified by `ProfileBBID`s (before cloning). + SmallVector> ClusterInfo; + // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along + // the edge a -> b (a is not cloned). The index of the path in this vector + // determines the `ProfileBBID::CloneID` of the cloned blocks in that path. + SmallVector> ClonePaths; +}; + +// Provides DenseMapInfo for ProfileBBID. +template <> struct DenseMapInfo { + static inline ProfileBBID getEmptyKey() { + unsigned EmptyKey = DenseMapInfo::getEmptyKey(); + return ProfileBBID{EmptyKey, EmptyKey}; + } + static inline ProfileBBID getTombstoneKey() { + unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); + return ProfileBBID{TombstoneKey, TombstoneKey}; + } + static unsigned getHashValue(const ProfileBBID &Val) { + std::pair PairVal = + std::make_pair(Val.BBID, Val.CloneID); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const ProfileBBID &LHS, const ProfileBBID &RHS) { + return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); + } +}; class BasicBlockSectionsProfileReader : public ImmutablePass { public: @@ -70,11 +113,11 @@ public: // function. If the first element is true and the second element is empty, it // means unique basic block sections are desired for all basic blocks of the // function. - std::pair> - getBBClusterInfoForFunction(StringRef FuncName) const; + std::pair + getPathAndClusterInfoForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and - // then reads the profile for matching functions. + // then reads the profile for the matching functions. bool doInitialization(Module &M) override; private: @@ -91,6 +134,12 @@ private: inconvertibleErrorCode()); } + // Parses a `ProfileBBID` from `S`. `S` must be in the form "" + // (representing an original block) or "." (representing a + // cloned block) where bbid is a non-negative integer and cloneid is a + // positive integer. + Expected parseProfileBBID(StringRef S) const; + // Reads the basic block sections profile for functions in this module. Error ReadProfile(); @@ -111,16 +160,16 @@ private: // empty string if no debug info is available. StringMap> FunctionNameToDIFilename; - // This encapsulates the BB cluster information for the whole program. + // This contains the BB cluster information for the whole program. // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; + // For every function name, it contains the cloning and cluster information + // for (all or some of) its basic blocks. The cluster information for every + // basic block includes its cluster ID along with the position of the basic + // block in that cluster. + StringMap ProgramPathAndClusterInfo; // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. + // name which appears in ProgramPathAndClusterInfo as a key. StringMap FuncAliasMap; }; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index de7c17082fa4..632fd68d88b5 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -168,31 +168,6 @@ updateBranches(MachineFunction &MF, } } -// This function provides the BBCluster information associated with a function. -// Returns true if a valid association exists and false otherwise. -bool getBBClusterInfoForFunction( - const MachineFunction &MF, - BasicBlockSectionsProfileReader *BBSectionsProfileReader, - DenseMap &V) { - - // Find the assoicated cluster information. - std::pair> P = - BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); - if (!P.first) - return false; - - if (P.second.empty()) { - // This indicates that sections are desired for all basic blocks of this - // function. We clear the BBClusterInfo vector to denote this. - V.clear(); - return true; - } - - for (const BBClusterInfo &BBCI : P.second) - V[BBCI.BBID] = BBCI; - return true; -} - // This function sorts basic blocks according to the cluster's information. // All explicitly specified clusters of basic blocks will be ordered // accordingly. All non-specified BBs go into a separate "Cold" section. @@ -200,12 +175,12 @@ bool getBBClusterInfoForFunction( // clusters, they are moved into a single "Exception" section. Eventually, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. -// FuncBBClusterInfo represent the cluster information for basic blocks. It +// ClusterInfoByBBID represents the cluster information for basic blocks. It // maps from BBID of basic blocks to their cluster information. If this is // empty, it means unique sections for all basic blocks in the function. -static void -assignSections(MachineFunction &MF, - const DenseMap &FuncBBClusterInfo) { +static void assignSections( + MachineFunction &MF, + const DenseMap> &ClusterInfoByBBID) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -216,17 +191,17 @@ assignSections(MachineFunction &MF, // With the 'all' option, every basic block is placed in a unique section. // With the 'list' option, every basic block is placed in a section // associated with its cluster, unless we want individual unique sections - // for every basic block in this function (if FuncBBClusterInfo is empty). + // for every basic block in this function (if ClusterInfoByBBID is empty). if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || - FuncBBClusterInfo.empty()) { + ClusterInfoByBBID.empty()) { // If unique sections are desired for all basic blocks of the function, we // set every basic block's section ID equal to its original position in // the layout (which is equal to its number). This ensures that basic // blocks are ordered canonically. MBB.setSectionID(MBB.getNumber()); } else { - auto I = FuncBBClusterInfo.find(*MBB.getBBID()); - if (I != FuncBBClusterInfo.end()) { + auto I = ClusterInfoByBBID.find(*MBB.getBBID()); + if (I != ClusterInfoByBBID.end()) { MBB.setSectionID(I->second.ClusterID); } else { // BB goes into the special cold section if it is not specified in the @@ -333,16 +308,28 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } - BBSectionsProfileReader = &getAnalysis(); + DenseMap> ClusterInfoByBBID; + if (BBSectionsType == BasicBlockSection::List) { + auto [HasProfile, PathAndClusterInfo] = + getAnalysis() + .getPathAndClusterInfoForFunction(MF.getName()); + if (!HasProfile) + return true; + for (const BBClusterInfo &BBP : + PathAndClusterInfo.ClusterInfo) { + // TODO: Apply the path cloning profile. + assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet"); + const auto [I, Inserted] = ClusterInfoByBBID.try_emplace( + BBP.BasicBlockID.BBID, + BBClusterInfo{BBP.BasicBlockID.BBID, BBP.ClusterID, + BBP.PositionInCluster}); + (void)I; + assert(Inserted && "Duplicate BBID found in profile"); + } + } - // Map from BBID of blocks to their cluster information. - DenseMap FuncBBClusterInfo; - if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, - FuncBBClusterInfo)) - return true; MF.setBBSectionsType(BBSectionsType); - assignSections(MF, FuncBBClusterInfo); + assignSections(MF, ClusterInfoByBBID); // We make sure that the cluster including the entry basic block precedes all // other clusters. @@ -376,8 +363,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return FuncBBClusterInfo.lookup(*X.getBBID()).PositionInCluster < - FuncBBClusterInfo.lookup(*Y.getBBID()).PositionInCluster; + return ClusterInfoByBBID.lookup(*X.getBBID()).PositionInCluster < + ClusterInfoByBBID.lookup(*Y.getBBID()).PositionInCluster; return X.getNumber() < Y.getNumber(); }; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index ef5f1251f532..6bb412a6c753 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -34,17 +35,37 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) +Expected +BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { + SmallVector Parts; + S.split(Parts, '.'); + if (Parts.size() > 2) + return createProfileParseError(Twine("unable to parse basic block id: '") + + S + "'"); + unsigned long long BBID; + if (getAsUnsignedInteger(Parts[0], 10, BBID)) + return createProfileParseError( + Twine("unable to parse BB id: '" + Parts[0]) + + "': unsigned integer expected"); + unsigned long long CloneID = 0; + if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) + return createProfileParseError(Twine("unable to parse clone id: '") + + Parts[1] + "': unsigned integer expected"); + return ProfileBBID{static_cast(BBID), + static_cast(CloneID)}; +} + bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getBBClusterInfoForFunction(FuncName).first; + return getPathAndClusterInfoForFunction(FuncName).first; } -std::pair> -BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( +std::pair +BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction( StringRef FuncName) const { - auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); - return R != ProgramBBClusterInfo.end() + auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + return R != ProgramPathAndClusterInfo.end() ? std::pair(true, R->second) - : std::pair(false, SmallVector{}); + : std::pair(false, FunctionPathAndClusterInfo()); } // Reads the version 1 basic block sections profile. Profile for each function @@ -61,8 +82,49 @@ BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( // aliases. Basic block clusters are specified by 'c' and specify the cluster of // basic blocks, and the internal order in which they must be placed in the same // section. +// This profile can also specify cloning paths which instruct the compiler to +// clone basic blocks along a path. The cloned blocks are then specified in the +// cluster information. +// The following profile lists two cloning paths (starting with 'p') for +// function bar and places the total 9 blocks within two clusters. The first two +// blocks of a cloning path specify the edge along which the path is cloned. For +// instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along +// the edge 1->3. Within the given clusters, each cloned block is identified by +// ".". For instance, 3.1 represents the first +// clone of block 3. Original blocks are specified just with their block ids. A +// block cloned multiple times appears with distinct clone ids. The CFG for bar +// is shown below before and after cloning with its final clusters labeled. +// +// f main +// f bar +// p 1 3 4 # cloning path 1 +// p 4 2 # cloning path 2 +// c 1 3.1 4.1 6 # basic block cluster 1 +// c 0 2 3 4 2.1 5 # basic block cluster 2 +// **************************************************************************** +// function bar before and after cloning with basic block clusters shown. +// **************************************************************************** +// .... .............. +// 0 -------+ : 0 :---->: 1 ---> 3.1 : +// | | : | : :........ | : +// v v : v : : v : +// +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 +// | | | | : | : : | : +// | v | | : v ....... : v : +// | 3 <------+ | : 3 <--+ : : 6 : +// | | | : | | : :....: +// | v | : v | : +// +--- 4 ---> 6 | : 4 | : +// | : | | : +// | : v | : +// | :2.1---+ : cluster 2 +// | : | ......: +// | : v : +// +-->: 5 : +// .... +// **************************************************************************** Error BasicBlockSectionsProfileReader::ReadV1Profile() { - auto FI = ProgramBBClusterInfo.end(); + auto FI = ProgramPathAndClusterInfo.end(); // Current cluster ID corresponding to this function. unsigned CurrentCluster = 0; @@ -71,7 +133,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Temporary set to ensure every basic block ID appears once in the clusters // of a function. - SmallSet FuncBBIDs; + DenseSet FuncBBIDs; // Debug-info-based module filename for the current function. Empty string // means no filename. @@ -85,7 +147,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { S.split(Values, ' '); switch (Specifier) { case '@': - break; + continue; case 'm': // Module name speicifer. if (Values.size() != 1) { return createProfileParseError(Twine("invalid module name value: '") + @@ -106,7 +168,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (!FunctionFound) { // Skip the following profile by setting the profile iterator (FI) to // the past-the-end element. - FI = ProgramBBClusterInfo.end(); + FI = ProgramPathAndClusterInfo.end(); DIFilename = ""; continue; } @@ -115,7 +177,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Prepare for parsing clusters of this function name. // Start a new cluster map for this function name. - auto R = ProgramBBClusterInfo.try_emplace(Values.front()); + auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); // Report error when multiple profiles have been specified for the same // function. if (!R.second) @@ -132,38 +194,55 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { case 'c': // Basic block cluster specifier. // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. - if (FI == ProgramBBClusterInfo.end()) - break; + if (FI == ProgramPathAndClusterInfo.end()) + continue; // Reset current cluster position. CurrentPosition = 0; - for (auto BBIDStr : Values) { - unsigned long long BBID; - if (getAsUnsignedInteger(BBIDStr, 10, BBID)) - return createProfileParseError(Twine("unsigned integer expected: '") + - BBIDStr + "'"); - if (!FuncBBIDs.insert(BBID).second) + for (auto BasicBlockIDStr : Values) { + auto BasicBlockID = parseProfileBBID(BasicBlockIDStr); + if (!BasicBlockID) + return BasicBlockID.takeError(); + if (!FuncBBIDs.insert(*BasicBlockID).second) return createProfileParseError( - Twine("duplicate basic block id found '") + BBIDStr + "'"); - if (BBID == 0 && CurrentPosition) + Twine("duplicate basic block id found '") + BasicBlockIDStr + + "'"); + + if (!BasicBlockID->BBID && CurrentPosition) return createProfileParseError( - "entry BB (0) does not begin a cluster"); + "entry BB (0) does not begin a cluster."); - FI->second.emplace_back( - BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); + FI->second.ClusterInfo.emplace_back(BBClusterInfo{ + *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); } CurrentCluster++; continue; + case 'p': { // Basic block cloning path specifier. + SmallSet BBsInPath; + FI->second.ClonePaths.push_back({}); + for (size_t I = 0; I < Values.size(); ++I) { + auto BBIDStr = Values[I]; + unsigned long long BBID = 0; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); + if (I != 0 && !BBsInPath.insert(BBID).second) + return createProfileParseError( + Twine("duplicate cloned block in path: '") + BBIDStr + "'"); + FI->second.ClonePaths.back().push_back(BBID); + } + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); } + llvm_unreachable("should not break from this switch statement"); } return Error::success(); } Error BasicBlockSectionsProfileReader::ReadV0Profile() { - auto FI = ProgramBBClusterInfo.end(); - + auto FI = ProgramPathAndClusterInfo.end(); // Current cluster ID corresponding to this function. unsigned CurrentCluster = 0; // Current position in the current cluster. @@ -184,7 +263,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { if (S.consume_front("!")) { // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. - if (FI == ProgramBBClusterInfo.end()) + if (FI == ProgramPathAndClusterInfo.end()) continue; SmallVector BBIDs; S.split(BBIDs, ' '); @@ -202,8 +281,10 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { return createProfileParseError( "entry BB (0) does not begin a cluster"); - FI->second.emplace_back( - BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); + FI->second.ClusterInfo.emplace_back( + BBClusterInfo({{static_cast(BBID), 0}, + CurrentCluster, + CurrentPosition++})); } CurrentCluster++; } else { @@ -237,7 +318,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { if (!FunctionFound) { // Skip the following profile by setting the profile iterator (FI) to // the past-the-end element. - FI = ProgramBBClusterInfo.end(); + FI = ProgramPathAndClusterInfo.end(); continue; } for (size_t i = 1; i < Aliases.size(); ++i) @@ -245,7 +326,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { // Prepare for parsing clusters of this function name. // Start a new cluster map for this function name. - auto R = ProgramBBClusterInfo.try_emplace(Aliases.front()); + auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); // Report error when multiple profiles have been specified for the same // function. if (!R.second) @@ -261,7 +342,7 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { // Basic Block Sections can be enabled for a subset of machine basic blocks. // This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the +// block sections are desired. Additionally, machine basic block ids of the // functions can also be specified for a finer granularity. Moreover, a cluster // of basic blocks could be assigned to the same section. // Optionally, a debug-info filename can be specified for each function to allow diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 5577601c02cf..597d8f6707ec 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -32,13 +32,35 @@ ; RUN: echo '!dummy1' >> %t8 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t8 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR8 ; CHECK-ERROR8: LLVM ERROR: invalid profile {{.*}} at line 2: invalid specifier: '!' -; RUN: echo 'v1' > %t0 -; RUN: echo 'm dummy1/module1 dummy1/module2' +; RUN: echo 'v1' > %t9 +; RUN: echo 'm dummy1/module1 dummy1/module2' >> %t9 ; RUN: echo 'f dummy1' >> %t9 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t8 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR8 -; CHECK-ERROR9: LLVM ERROR: invalid profile {{.*}} at line 2: invalid module name value: 'dummy1/module dummy1/module2' - - +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t9 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR9 +; CHECK-ERROR9: LLVM ERROR: invalid profile {{.*}} at line 2: invalid module name value: 'dummy1/module1 dummy1/module2' +;; +;; Error handling for version 1, cloning paths. +; RUN: echo 'v1' > %t10 +; RUN: echo 'f dummy1' >> %t10 +; RUN: echo 'c 0 1.1.1' >> %t10 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t10 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR10 +; CHECK-ERROR10: LLVM ERROR: invalid profile {{.*}} at line 3: unable to parse basic block id: '1.1.1' +; RUN: echo 'v1' > %t11 +; RUN: echo 'f dummy1' >> %t11 +; RUN: echo 'c 0 1.a' >> %t11 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t11 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR11 +; CHECK-ERROR11: LLVM ERROR: invalid profile {{.*}} at line 3: unable to parse clone id: 'a' +; RUN: echo 'v1' > %t12 +; RUN: echo 'f dummy1' >> %t12 +; RUN: echo 'c 0 1' >> %t12 +; RUN: echo 'p 1 2.1' >> %t12 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t12 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR12 +; CHECK-ERROR12: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2.1' +; RUN: echo 'v1' > %t13 +; RUN: echo 'f dummy1' >> %t13 +; RUN: echo 'c 0 1' >> %t13 +; RUN: echo 'p 1 2 3 2' >> %t13 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t13 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR13 +; CHECK-ERROR13: LLVM ERROR: invalid profile {{.*}} at line 4: duplicate cloned block in path: '2' define i32 @dummy1(i32 %x, i32 %y, i32 %z) { entry: @@ -63,4 +85,3 @@ define i32 @dummy2(i32 %x, i32 %y, i32 %z) !dbg !4 { !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = distinct !DISubprogram(name: "dummy1", scope: !1, unit: !0) - -- Gitee From 8f6170e0fc60c61354bdcef0f1b3c1b619b9fb74 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 19 Sep 2023 14:37:06 -0700 Subject: [PATCH 13/47] [AsmPrint] Dump raw frequencies in `-mbb-profile-dump` (#66818) We were losing the function entry count, which is useful to check profile quality. For the original cases where we want entrypoint-relative MBB frequencies, the user would just need to divide these values by the entrypoint (first MBB, with ID=0) value. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5381dfdd184c..bdc37b0e8ea2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1934,7 +1934,7 @@ void AsmPrinter::emitFunctionBody() { for (const auto &MBB : *MF) { *MBBProfileDumpFileOutput.get() << MF->getName() << "," << MBB.getBBID() << "," - << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n"; + << MBFI.getBlockFreq(&MBB).getFrequency() << "\n"; } } } diff --git a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll index 34244e750017..5d94d97a71be 100644 --- a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll +++ b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll @@ -15,6 +15,8 @@ define i64 @f2(i64 %a, i64 %b) { ret i64 %sum } +; CHECK: f2,0,8 + define i64 @f1() { %sum = call i64 @f2(i64 2, i64 2) %isEqual = icmp eq i64 %sum, 4 @@ -25,10 +27,9 @@ ifNotEqual: ret i64 %sum } -; CHECK: f2,0,1.000000e+00 -; CHECK-NEXT: f1,0,1.000000e+00 -; CHECK-NEXT: f1,1,5.000000e-01 -; CHECK-NEXT: f1,2,1.000000e+00 +; CHECK-NEXT: f1,0,16 +; CHECK-NEXT: f1,1,8 +; CHECK-NEXT: f1,2,16 ; Check that if we pass -mbb-profile-dump but don't set -basic-block-sections, ; we get an appropriate error message -- Gitee From 3d7213e3bda8df00aa406648c97bf5772e9d9912 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 29 Sep 2023 18:06:53 -0700 Subject: [PATCH 14/47] [AsmPrint] Correctly factor function entry count when dumping MBB frequencies (#67826) The goal in #66818 was to capture function entry counts, but those are not the same as the frequency of the entry (machine) basic block. This fixes that, and adds explicit profiles to the test. We also increase the precision of `MachineBlockFrequencyInfo::getBlockFreqRelativeToEntryBlock` to double. Existing code uses it as float so should be unaffected. --- .../llvm/CodeGen/MachineBlockFrequencyInfo.h | 5 +-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 20 +++++++++-- .../CodeGen/MLRegalloc/bb-profile-dump.ll | 36 +++++++++++++++---- 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 6d58c7a14fb9..1152eefed6e4 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -65,9 +65,10 @@ public: /// Compute the frequency of the block, relative to the entry block. /// This API assumes getEntryFreq() is non-zero. - float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const { + double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const { assert(getEntryFreq() != 0 && "getEntryFreq() should not return 0 here!"); - return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq()); + return static_cast(getBlockFreq(MBB).getFrequency()) / + static_cast(getEntryFreq()); } std::optional diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bdc37b0e8ea2..9477313825f4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1923,7 +1923,8 @@ void AsmPrinter::emitFunctionBody() { // Output MBB ids, function names, and frequencies if the flag to dump // MBB profile information has been set - if (MBBProfileDumpFileOutput) { + if (MBBProfileDumpFileOutput && !MF->empty() && + MF->getFunction().getEntryCount()) { if (!MF->hasBBLabels()) MF->getContext().reportError( SMLoc(), @@ -1931,10 +1932,23 @@ void AsmPrinter::emitFunctionBody() { "must be called with -basic-block-sections=labels"); MachineBlockFrequencyInfo &MBFI = getAnalysis().getBFI(); + // The entry count and the entry basic block frequency aren't the same. We + // want to capture "absolute" frequencies, i.e. the frequency with which a + // MBB is executed when the program is executed. From there, we can derive + // Function-relative frequencies (divide by the value for the first MBB). + // We also have the information about frequency with which functions + // were called. This helps, for example, in a type of integration tests + // where we want to cross-validate the compiler's profile with a real + // profile. + // Using double precision because uint64 values used to encode mbb + // "frequencies" may be quite large. + const double EntryCount = + static_cast(MF->getFunction().getEntryCount()->getCount()); for (const auto &MBB : *MF) { + const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); + const double AbsMBBFreq = MBBRelFreq * EntryCount; *MBBProfileDumpFileOutput.get() - << MF->getName() << "," << MBB.getBBID() << "," - << MBFI.getBlockFreq(&MBB).getFrequency() << "\n"; + << MF->getName() << "," << MBB.getBBID() << "," << AbsMBBFreq << "\n"; } } } diff --git a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll index 5d94d97a71be..cc6332422af5 100644 --- a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll +++ b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll @@ -10,26 +10,48 @@ ; Check that given a simple case, we can return the default MBFI -define i64 @f2(i64 %a, i64 %b) { +define i64 @f2(i64 %a, i64 %b) !prof !1{ %sum = add i64 %a, %b ret i64 %sum } -; CHECK: f2,0,8 +; CHECK: f2,0,1.000000e+03 -define i64 @f1() { +define i64 @f1() !prof !2{ %sum = call i64 @f2(i64 2, i64 2) %isEqual = icmp eq i64 %sum, 4 - br i1 %isEqual, label %ifEqual, label %ifNotEqual + br i1 %isEqual, label %ifEqual, label %ifNotEqual, !prof !3 ifEqual: ret i64 0 ifNotEqual: ret i64 %sum } -; CHECK-NEXT: f1,0,16 -; CHECK-NEXT: f1,1,8 -; CHECK-NEXT: f1,2,16 +; CHECK-NEXT: f1,0,1.000000e+01 +; CHECK-NEXT: f1,2,6.000000e+00 +; CHECK-NEXT: f1,1,4.000000e+00 + +define void @f3(i32 %iter) !prof !4 { +entry: + br label %loop +loop: + %i = phi i32 [0, %entry], [%i_next, %loop] + %i_next = add i32 %i, 1 + %exit_cond = icmp slt i32 %i_next, %iter + br i1 %exit_cond, label %loop, label %exit, !prof !5 +exit: + ret void +} + +; CHECK-NEXT: f3,0,2.000000e+00 +; CHECK-NEXT: f3,1,2.002000e+03 +; CHECK-NEXT: f3,2,2.000000e+00 + +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"function_entry_count", i64 10} +!3 = !{!"branch_weights", i32 2, i32 3} +!4 = !{!"function_entry_count", i64 2} +!5 = !{!"branch_weights", i32 1000, i32 1} ; Check that if we pass -mbb-profile-dump but don't set -basic-block-sections, ; we get an appropriate error message -- Gitee From 9b47065e5f23e9e8d53aa01a808f6ee07001f0a9 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 27 Oct 2023 21:49:39 -0700 Subject: [PATCH 15/47] [BasicBlockSections] Apply path cloning with -basic-block-sections. (#68860) https://github.com/llvm/llvm-project/commit/28b912687900bc0a67cd61c374fce296b09963c4 introduced the path cloning format in the basic-block-sections profile. This PR validates and applies path clonings. A path cloning is valid if all of these conditions hold: 1. All bb ids in the path are mapped to existing blocks. 2. Each two consecutive bb ids in the path have a successor relationship in the CFG. 3. The path does not include a block with indirect branches, except possibly as the last block. Applying a path cloning involves cloning all blocks in the path (except the first one) and setting up their branches. Once all clonings are applied, the cluster information is used to guide block layout in the modified function. --- .../llvm/CodeGen/BasicBlockSectionUtils.h | 9 + .../CodeGen/BasicBlockSectionsProfileReader.h | 59 ++--- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 +- llvm/include/llvm/CodeGen/MachineFunction.h | 7 +- llvm/include/llvm/CodeGen/Passes.h | 2 + llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 49 ++-- llvm/lib/CodeGen/BasicBlockPathCloning.cpp | 245 ++++++++++++++++++ llvm/lib/CodeGen/BasicBlockSections.cpp | 55 ++-- .../BasicBlockSectionsProfileReader.cpp | 60 +++-- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 18 +- llvm/lib/CodeGen/MachineBasicBlock.cpp | 6 +- llvm/lib/CodeGen/MachineFunction.cpp | 7 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 15 +- llvm/lib/CodeGen/TargetPassConfig.cpp | 1 + .../X86/basic-block-labels-mir-parse.mir | 2 +- .../X86/basic-block-sections-cloning-1.ll | 71 +++++ .../X86/basic-block-sections-cloning-2.ll | 86 ++++++ ...block-sections-cloning-indirect-invalid.ll | 45 ++++ .../basic-block-sections-cloning-indirect.ll | 43 +++ .../basic-block-sections-cloning-invalid.ll | 72 +++++ 23 files changed, 736 insertions(+), 134 deletions(-) create mode 100644 llvm/lib/CodeGen/BasicBlockPathCloning.cpp create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h index d43f399b2c31..292abf8b2b51 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -27,6 +27,15 @@ void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, void avoidZeroOffsetLandingPad(MachineFunction &MF); +/// This checks if the source of this function has drifted since this binary was +/// profiled previously. +/// For now, we are piggy backing on what PGO does to +/// detect this with instrumented profiles. PGO emits an hash of the IR and +/// checks if the hash has changed. Advanced basic block layout is usually done +/// on top of PGO optimized binaries and hence this check works well in +/// practice. +bool hasInstrProfHashMismatch(MachineFunction &MF); + } // end namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 6e01dfd11ee6..dfb8d5d9f2f5 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -19,33 +19,22 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +using namespace llvm; namespace llvm { -// This structure represents a unique ID for every block specified in the -// input profile. -struct ProfileBBID { - // Basic block id associated with `MachineBasicBlock::BBID`. - unsigned BBID; - // The clone id associated with the block. This is zero for the original - // block. For the cloned ones, it is equal to 1 + index of the associated - // path in `FunctionPathAndClusterInfo::ClonePaths`. - unsigned CloneID; -}; - // This struct represents the cluster information for a machine basic block, -// which is specifed by a unique ID. This templated struct is used for both the -// raw input profile (as `BBClusterInfo`) and the processed profile -// after applying the clonings (as `BBClusterInfo`). -template struct BBClusterInfo { +// which is specifed by a unique ID (`MachineBasicBlock::BBID`). +struct BBClusterInfo { // Basic block ID. - BBIDType BasicBlockID; + UniqueBBID BBID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. @@ -54,31 +43,31 @@ template struct BBClusterInfo { // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { - // BB Cluster information specified by `ProfileBBID`s (before cloning). - SmallVector> ClusterInfo; + // BB Cluster information specified by `UniqueBBID`s. + SmallVector ClusterInfo; // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along // the edge a -> b (a is not cloned). The index of the path in this vector - // determines the `ProfileBBID::CloneID` of the cloned blocks in that path. + // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; }; -// Provides DenseMapInfo for ProfileBBID. -template <> struct DenseMapInfo { - static inline ProfileBBID getEmptyKey() { +// Provides DenseMapInfo for UniqueBBID. +template <> struct DenseMapInfo { + static inline UniqueBBID getEmptyKey() { unsigned EmptyKey = DenseMapInfo::getEmptyKey(); - return ProfileBBID{EmptyKey, EmptyKey}; + return UniqueBBID{EmptyKey, EmptyKey}; } - static inline ProfileBBID getTombstoneKey() { + static inline UniqueBBID getTombstoneKey() { unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); - return ProfileBBID{TombstoneKey, TombstoneKey}; + return UniqueBBID{TombstoneKey, TombstoneKey}; } - static unsigned getHashValue(const ProfileBBID &Val) { + static unsigned getHashValue(const UniqueBBID &Val) { std::pair PairVal = - std::make_pair(Val.BBID, Val.CloneID); + std::make_pair(Val.BaseID, Val.CloneID); return DenseMapInfo>::getHashValue(PairVal); } - static bool isEqual(const ProfileBBID &LHS, const ProfileBBID &RHS) { - return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + static bool isEqual(const UniqueBBID &LHS, const UniqueBBID &RHS) { + return DenseMapInfo::isEqual(LHS.BaseID, RHS.BaseID) && DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); } }; @@ -113,8 +102,12 @@ public: // function. If the first element is true and the second element is empty, it // means unique basic block sections are desired for all basic blocks of the // function. - std::pair - getPathAndClusterInfoForFunction(StringRef FuncName) const; + std::pair> + getClusterInfoForFunction(StringRef FuncName) const; + + // Returns the path clonings for the given function. + SmallVector> + getClonePathsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. @@ -134,11 +127,11 @@ private: inconvertibleErrorCode()); } - // Parses a `ProfileBBID` from `S`. `S` must be in the form "" + // Parses a `UniqueBBID` from `S`. `S` must be in the form "" // (representing an original block) or "." (representing a // cloned block) where bbid is a non-negative integer and cloneid is a // positive integer. - Expected parseProfileBBID(StringRef S) const; + Expected parseUniqueBBID(StringRef S) const; // Reads the basic block sections profile for functions in this module. Error ReadProfile(); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index d52340b4099d..69b9a95b1018 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -77,6 +77,13 @@ private: MBBSectionID(SectionType T) : Type(T), Number(0) {} }; +// This structure represents the information for a basic block. +struct UniqueBBID { + unsigned BaseID; + // sections profile). + unsigned CloneID; +}; + template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -184,7 +191,7 @@ private: /// Fixed unique ID assigned to this basic block upon creation. Used with /// basic block sections and basic block labels. - std::optional BBID; + std::optional BBID; /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -637,7 +644,7 @@ public: void setIsEndSection(bool V = true) { IsEndSection = V; } - std::optional getBBID() const { return BBID; } + std::optional getBBID() const { return BBID; } /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -649,7 +656,7 @@ public: } /// Sets the fixed BBID of this basic block. - void setBBID(unsigned V) { + void setBBID(const UniqueBBID &V) { assert(!BBID.has_value() && "Cannot change BBID."); BBID = V; } @@ -757,7 +764,7 @@ public: /// /// This is useful when doing a partial clone of successors. Afterward, the /// probabilities may need to be normalized. - void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); + void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I); /// Split the old successor into old plus new and updates the probability /// info. diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 09f9ff60f955..7fe41f7a2862 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -999,8 +999,11 @@ public: void deleteMachineInstr(MachineInstr *MI); /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this - /// instead of `new MachineBasicBlock'. - MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr); + /// instead of `new MachineBasicBlock'. Sets `MachineBasicBlock::BBID` if + /// basic-block-sections is enabled for the function. + MachineBasicBlock * + CreateMachineBasicBlock(const BasicBlock *BB = nullptr, + std::optional BBID = std::nullopt); /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. void deleteMachineBasicBlock(MachineBasicBlock *MBB); diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 04888ad90b5d..90f43897905c 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -65,6 +65,8 @@ namespace llvm { /// basic blocks and is enabled with -fbasic-block-sections. MachineFunctionPass *createBasicBlockSectionsPass(); + MachineFunctionPass *createBasicBlockPathCloningPass(); + /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f5e580fd3569..1ce24ef860b7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -55,6 +55,7 @@ void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); +void initializeBasicBlockPathCloningPass(PassRegistry &); void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBarrierNoopPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9477313825f4..80652bc90a30 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1370,7 +1370,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { if (BBAddrMapVersion > 1) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. - OutStreamer->emitULEB128IntValue(*MBB.getBBID()); + // We only emit BaseID since CloneID is unset for + // basic-block-sections=labels. + // TODO: Emit the full BBID when labels and sections can be mixed + // together. + OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); } // Emit the basic block offset relative to the end of the previous block. // This is zero unless the block is padded due to alignment. @@ -1925,30 +1929,33 @@ void AsmPrinter::emitFunctionBody() { // MBB profile information has been set if (MBBProfileDumpFileOutput && !MF->empty() && MF->getFunction().getEntryCount()) { - if (!MF->hasBBLabels()) + if (!MF->hasBBLabels()) { MF->getContext().reportError( SMLoc(), "Unable to find BB labels for MBB profile dump. -mbb-profile-dump " "must be called with -basic-block-sections=labels"); - MachineBlockFrequencyInfo &MBFI = - getAnalysis().getBFI(); - // The entry count and the entry basic block frequency aren't the same. We - // want to capture "absolute" frequencies, i.e. the frequency with which a - // MBB is executed when the program is executed. From there, we can derive - // Function-relative frequencies (divide by the value for the first MBB). - // We also have the information about frequency with which functions - // were called. This helps, for example, in a type of integration tests - // where we want to cross-validate the compiler's profile with a real - // profile. - // Using double precision because uint64 values used to encode mbb - // "frequencies" may be quite large. - const double EntryCount = - static_cast(MF->getFunction().getEntryCount()->getCount()); - for (const auto &MBB : *MF) { - const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); - const double AbsMBBFreq = MBBRelFreq * EntryCount; - *MBBProfileDumpFileOutput.get() - << MF->getName() << "," << MBB.getBBID() << "," << AbsMBBFreq << "\n"; + } else { + MachineBlockFrequencyInfo &MBFI = + getAnalysis().getBFI(); + // The entry count and the entry basic block frequency aren't the same. We + // want to capture "absolute" frequencies, i.e. the frequency with which a + // MBB is executed when the program is executed. From there, we can derive + // Function-relative frequencies (divide by the value for the first MBB). + // We also have the information about frequency with which functions + // were called. This helps, for example, in a type of integration tests + // where we want to cross-validate the compiler's profile with a real + // profile. + // Using double precision because uint64 values used to encode mbb + // "frequencies" may be quite large. + const double EntryCount = + static_cast(MF->getFunction().getEntryCount()->getCount()); + for (const auto &MBB : *MF) { + const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); + const double AbsMBBFreq = MBBRelFreq * EntryCount; + *MBBProfileDumpFileOutput.get() + << MF->getName() << "," << MBB.getBBID()->BaseID << "," + << AbsMBBFreq << "\n"; + } } } } diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp new file mode 100644 index 000000000000..5d5f3c3da481 --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -0,0 +1,245 @@ +//===-- BasicBlockPathCloning.cpp ---=========-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// BasicBlockPathCloning implementation. +/// +/// The purpose of this pass is to clone basic block paths based on information +/// provided by the -fbasic-block-sections=list option. +/// Please refer to BasicBlockSectionsProfileReader.cpp to see a path cloning +/// example. +//===----------------------------------------------------------------------===// +// This pass clones the machine basic blocks alongs the given paths and sets up +// the CFG. It assigns BBIDs to the cloned blocks so that the +// `BasicBlockSections` pass can correctly map the cluster information to the +// blocks. The cloned block's BBID will have the same BaseID as the original +// block, but will get a unique non-zero CloneID (original blocks all have zero +// CloneIDs). This pass applies a path cloning if it satisfies the following +// conditions: +// 1. All BBIDs in the path should be mapped to existing blocks. +// 2. Each two consecutive BBIDs in the path must have a successor +// relationship in the CFG. +// 3. The path should not include a block with indirect branches, except for +// the last block. +// If a path does not satisfy all three conditions, it will be rejected, but the +// CloneIDs for its (supposed to be cloned) blocks will be bypassed to make sure +// that the `BasicBlockSections` pass can map cluster info correctly to the +// actually-cloned blocks. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +// Clones the given block and assigns the given `CloneID` to its BBID. Copies +// the instructions into the new block and sets up its successors. +MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock &OrigBB, + unsigned CloneID) { + auto &MF = *OrigBB.getParent(); + auto TII = MF.getSubtarget().getInstrInfo(); + // Create the clone block and set its BBID based on the original block. + MachineBasicBlock *CloneBB = MF.CreateMachineBasicBlock( + OrigBB.getBasicBlock(), UniqueBBID{OrigBB.getBBID()->BaseID, CloneID}); + MF.push_back(CloneBB); + + // Copy the instructions. + for (auto &I : OrigBB.instrs()) { + // Bundled instructions are duplicated together. + if (I.isBundledWithPred()) + continue; + TII->duplicate(*CloneBB, CloneBB->end(), I); + } + + // Add the successors of the original block as the new block's successors. + // We set the predecessor after returning from this call. + for (auto SI = OrigBB.succ_begin(), SE = OrigBB.succ_end(); SI != SE; ++SI) + CloneBB->copySuccessor(&OrigBB, SI); + + if (auto FT = OrigBB.getFallThrough(/*JumpToFallThrough=*/false)) { + // The original block has an implicit fall through. + // Insert an explicit unconditional jump from the cloned block to the + // fallthrough block. Technically, this is only needed for the last block + // of the path, but we do it for all clones for consistency. + TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc()); + } + return CloneBB; +} + +// Returns if we can legally apply the cloning represented by `ClonePath`. +// `BBIDToBlock` contains the original basic blocks in function `MF` keyed by +// their `BBID::BaseID`. +bool IsValidCloning(const MachineFunction &MF, + const DenseMap &BBIDToBlock, + const SmallVector &ClonePath) { + const MachineBasicBlock *PrevBB = nullptr; + for (size_t I = 0; I < ClonePath.size(); ++I) { + unsigned BBID = ClonePath[I]; + const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID); + if (!PathBB) { + WithColor::warning() << "no block with id " << BBID << " in function " + << MF.getName() << "\n"; + return false; + } + + if (PrevBB) { + if (!PrevBB->isSuccessor(PathBB)) { + WithColor::warning() + << "block #" << BBID << " is not a successor of block #" + << PrevBB->getBBID()->BaseID << " in function " << MF.getName() + << "\n"; + return false; + } + + for (auto &MI : *PathBB) { + // Avoid cloning when the block contains non-duplicable instructions. + // CFI instructions are marked as non-duplicable only because of Darwin, + // so we exclude them from this check. + if (MI.isNotDuplicable() && !MI.isCFIInstruction()) { + WithColor::warning() + << "block #" << BBID + << " has non-duplicable instructions in function " << MF.getName() + << "\n"; + return false; + } + } + } + + if (I != ClonePath.size() - 1 && !PathBB->empty() && + PathBB->back().isIndirectBranch()) { + WithColor::warning() + << "block #" << BBID + << " has indirect branch and appears as the non-tail block of a " + "path in function " + << MF.getName() << "\n"; + return false; + } + PrevBB = PathBB; + } + return true; +} + +// Applies all clonings specified in `ClonePaths` to `MF`. Returns true +// if any clonings have been applied. +bool ApplyCloning(MachineFunction &MF, + const SmallVector> &ClonePaths) { + if (ClonePaths.empty()) + return false; + bool AnyPathsCloned = false; + // Map from the final BB IDs to the `MachineBasicBlock`s. + DenseMap BBIDToBlock; + for (auto &BB : MF) + BBIDToBlock.try_emplace(BB.getBBID()->BaseID, &BB); + + DenseMap NClonesForBBID; + auto TII = MF.getSubtarget().getInstrInfo(); + for (const auto &ClonePath : ClonePaths) { + if (!IsValidCloning(MF, BBIDToBlock, ClonePath)) { + // We still need to increment the number of clones so we can map + // to the cluster info correctly. + for (unsigned BBID : ClonePath) + ++NClonesForBBID[BBID]; + continue; + } + MachineBasicBlock *PrevBB = nullptr; + for (unsigned BBID : ClonePath) { + MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID); + if (PrevBB == nullptr) { + // The first block in the path is not cloned. We only need to make it + // branch to the next cloned block in the path. Here, we make its + // fallthrough explicit so we can change it later. + if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) { + TII->insertUnconditionalBranch(*OrigBB, FT, + OrigBB->findBranchDebugLoc()); + } + PrevBB = OrigBB; + continue; + } + MachineBasicBlock *CloneBB = + CloneMachineBasicBlock(*OrigBB, ++NClonesForBBID[BBID]); + + // Set up the previous block in the path to jump to the clone. This also + // transfers the successor/predecessor relationship of PrevBB and OrigBB + // to that of PrevBB and CloneBB. + PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB); + + // Copy the livein set. + for (auto &LiveIn : OrigBB->liveins()) + CloneBB->addLiveIn(LiveIn); + + PrevBB = CloneBB; + } + AnyPathsCloned = true; + } + return AnyPathsCloned; +} +} // end anonymous namespace + +namespace llvm { +class BasicBlockPathCloning : public MachineFunctionPass { +public: + static char ID; + + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + + BasicBlockPathCloning() : MachineFunctionPass(ID) { + initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Basic Block Path Cloning"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // namespace llvm + +char BasicBlockPathCloning::ID = 0; +INITIALIZE_PASS_BEGIN( + BasicBlockPathCloning, "bb-path-cloning", + "Applies path clonings for the -basic-block-sections=list option", false, + false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_END( + BasicBlockPathCloning, "bb-path-cloning", + "Applies path clonings for the -basic-block-sections=list option", false, + false) + +bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + + return ApplyCloning(MF, getAnalysis() + .getClonePathsForFunction(MF.getName())); +} + +void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass *llvm::createBasicBlockPathCloningPass() { + return new BasicBlockPathCloning(); +} diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 632fd68d88b5..42997d2287d6 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -175,12 +175,12 @@ updateBranches(MachineFunction &MF, // clusters, they are moved into a single "Exception" section. Eventually, // clusters are ordered in increasing order of their IDs, with the "Exception" // and "Cold" succeeding all other clusters. -// ClusterInfoByBBID represents the cluster information for basic blocks. It +// FuncClusterInfo represents the cluster information for basic blocks. It // maps from BBID of basic blocks to their cluster information. If this is // empty, it means unique sections for all basic blocks in the function. -static void assignSections( - MachineFunction &MF, - const DenseMap> &ClusterInfoByBBID) { +static void +assignSections(MachineFunction &MF, + const DenseMap &FuncClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -191,17 +191,17 @@ static void assignSections( // With the 'all' option, every basic block is placed in a unique section. // With the 'list' option, every basic block is placed in a section // associated with its cluster, unless we want individual unique sections - // for every basic block in this function (if ClusterInfoByBBID is empty). + // for every basic block in this function (if FuncClusterInfo is empty). if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || - ClusterInfoByBBID.empty()) { + FuncClusterInfo.empty()) { // If unique sections are desired for all basic blocks of the function, we // set every basic block's section ID equal to its original position in // the layout (which is equal to its number). This ensures that basic // blocks are ordered canonically. MBB.setSectionID(MBB.getNumber()); } else { - auto I = ClusterInfoByBBID.find(*MBB.getBBID()); - if (I != ClusterInfoByBBID.end()) { + auto I = FuncClusterInfo.find(*MBB.getBBID()); + if (I != FuncClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { // BB goes into the special cold section if it is not specified in the @@ -264,12 +264,7 @@ void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) { } } -// This checks if the source of this function has drifted since this binary was -// profiled previously. For now, we are piggy backing on what PGO does to -// detect this with instrumented profiles. PGO emits an hash of the IR and -// checks if the hash has changed. Advanced basic block layout is usually done -// on top of PGO optimized binaries and hence this check works well in practice. -static bool hasInstrProfHashMismatch(MachineFunction &MF) { +bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) { if (!BBSectionsDetectSourceDrift) return false; @@ -290,7 +285,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { assert(BBSectionsType != BasicBlockSection::None && "BB Sections not enabled!"); - // Check for source drift. If the source has changed since the profiles + // Check for source drift. If the source has changed since the profiles // were obtained, optimizing basic blocks might be sub-optimal. // This only applies to BasicBlockSection::List as it creates // clusters of basic blocks using basic block ids. Source drift can @@ -298,38 +293,30 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // regards to performance. if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) - return true; + return false; // Renumber blocks before sorting them. This is useful for accessing the // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); - return true; + return false; } - DenseMap> ClusterInfoByBBID; + DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, PathAndClusterInfo] = + auto [HasProfile, ClusterInfo] = getAnalysis() - .getPathAndClusterInfoForFunction(MF.getName()); + .getClusterInfoForFunction(MF.getName()); if (!HasProfile) - return true; - for (const BBClusterInfo &BBP : - PathAndClusterInfo.ClusterInfo) { - // TODO: Apply the path cloning profile. - assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet"); - const auto [I, Inserted] = ClusterInfoByBBID.try_emplace( - BBP.BasicBlockID.BBID, - BBClusterInfo{BBP.BasicBlockID.BBID, BBP.ClusterID, - BBP.PositionInCluster}); - (void)I; - assert(Inserted && "Duplicate BBID found in profile"); + return false; + for (auto &BBClusterInfo : ClusterInfo) { + FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } MF.setBBSectionsType(BBSectionsType); - assignSections(MF, ClusterInfoByBBID); + assignSections(MF, FuncClusterInfo); // We make sure that the cluster including the entry basic block precedes all // other clusters. @@ -363,8 +350,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return ClusterInfoByBBID.lookup(*X.getBBID()).PositionInCluster < - ClusterInfoByBBID.lookup(*Y.getBBID()).PositionInCluster; + return FuncClusterInfo.lookup(*X.getBBID()).PositionInCluster < + FuncClusterInfo.lookup(*Y.getBBID()).PositionInCluster; return X.getNumber() < Y.getNumber(); }; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 6bb412a6c753..96662378a869 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -35,15 +35,15 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) -Expected -BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { +Expected +BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { SmallVector Parts; S.split(Parts, '.'); if (Parts.size() > 2) return createProfileParseError(Twine("unable to parse basic block id: '") + S + "'"); - unsigned long long BBID; - if (getAsUnsignedInteger(Parts[0], 10, BBID)) + unsigned long long BaseBBID; + if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) return createProfileParseError( Twine("unable to parse BB id: '" + Parts[0]) + "': unsigned integer expected"); @@ -51,21 +51,27 @@ BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const { if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) return createProfileParseError(Twine("unable to parse clone id: '") + Parts[1] + "': unsigned integer expected"); - return ProfileBBID{static_cast(BBID), - static_cast(CloneID)}; + return UniqueBBID{static_cast(BaseBBID), + static_cast(CloneID)}; } bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { - return getPathAndClusterInfoForFunction(FuncName).first; + return getClusterInfoForFunction(FuncName).first; } -std::pair -BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction( +std::pair> +BasicBlockSectionsProfileReader::getClusterInfoForFunction( StringRef FuncName) const { auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); return R != ProgramPathAndClusterInfo.end() - ? std::pair(true, R->second) - : std::pair(false, FunctionPathAndClusterInfo()); + ? std::pair(true, R->second.ClusterInfo) + : std::pair(false, SmallVector()); +} + +SmallVector> +BasicBlockSectionsProfileReader::getClonePathsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; } // Reads the version 1 basic block sections profile. Profile for each function @@ -133,7 +139,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Temporary set to ensure every basic block ID appears once in the clusters // of a function. - DenseSet FuncBBIDs; + DenseSet FuncBBIDs; // Debug-info-based module filename for the current function. Empty string // means no filename. @@ -199,7 +205,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // Reset current cluster position. CurrentPosition = 0; for (auto BasicBlockIDStr : Values) { - auto BasicBlockID = parseProfileBBID(BasicBlockIDStr); + auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); if (!BasicBlockID) return BasicBlockID.takeError(); if (!FuncBBIDs.insert(*BasicBlockID).second) @@ -207,28 +213,32 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Twine("duplicate basic block id found '") + BasicBlockIDStr + "'"); - if (!BasicBlockID->BBID && CurrentPosition) + if (!BasicBlockID->BaseID && CurrentPosition) return createProfileParseError( "entry BB (0) does not begin a cluster."); - FI->second.ClusterInfo.emplace_back(BBClusterInfo{ + FI->second.ClusterInfo.emplace_back(BBClusterInfo{ *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); } CurrentCluster++; continue; case 'p': { // Basic block cloning path specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; SmallSet BBsInPath; FI->second.ClonePaths.push_back({}); for (size_t I = 0; I < Values.size(); ++I) { - auto BBIDStr = Values[I]; - unsigned long long BBID = 0; - if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + auto BaseBBIDStr = Values[I]; + unsigned long long BaseBBID = 0; + if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) return createProfileParseError(Twine("unsigned integer expected: '") + - BBIDStr + "'"); - if (I != 0 && !BBsInPath.insert(BBID).second) + BaseBBIDStr + "'"); + if (I != 0 && !BBsInPath.insert(BaseBBID).second) return createProfileParseError( - Twine("duplicate cloned block in path: '") + BBIDStr + "'"); - FI->second.ClonePaths.back().push_back(BBID); + Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); + FI->second.ClonePaths.back().push_back(BaseBBID); } continue; } @@ -282,9 +292,9 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { "entry BB (0) does not begin a cluster"); FI->second.ClusterInfo.emplace_back( - BBClusterInfo({{static_cast(BBID), 0}, - CurrentCluster, - CurrentPosition++})); + BBClusterInfo({{static_cast(BBID), 0}, + CurrentCluster, + CurrentPosition++})); } CurrentCluster++; } else { diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 326093ecff59..d3eb4df6173e 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -46,6 +46,7 @@ add_llvm_component_library(LLVMCodeGen BranchRelaxation.cpp BreakFalseDeps.cpp BasicBlockSections.cpp + BasicBlockPathCloning.cpp BasicBlockSectionsProfileReader.cpp CalcSpillWeights.cpp CallBrPrepare.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 6272b654b329..79a95ee0d747 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -20,6 +20,7 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAssignmentTrackingAnalysisPass(Registry); initializeAtomicExpandPass(Registry); + initializeBasicBlockPathCloningPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index bfd9286ff59c..55bdb832aa76 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -500,7 +500,7 @@ public: bool parseAlignment(uint64_t &Alignment); bool parseAddrspace(unsigned &Addrspace); bool parseSectionID(std::optional &SID); - bool parseBBID(std::optional &BBID); + bool parseBBID(std::optional &BBID); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -665,14 +665,20 @@ bool MIParser::parseSectionID(std::optional &SID) { } // Parse Machine Basic Block ID. -bool MIParser::parseBBID(std::optional &BBID) { +bool MIParser::parseBBID(std::optional &BBID) { assert(Token.is(MIToken::kw_bb_id)); lex(); - unsigned Value = 0; - if (getUnsigned(Value)) + unsigned BaseID = 0; + unsigned CloneID = 0; + if (getUnsigned(BaseID)) return error("Unknown BB ID"); - BBID = Value; lex(); + if (Token.is(MIToken::IntegerLiteral)) { + if (getUnsigned(CloneID)) + return error("Unknown Clone ID"); + lex(); + } + BBID = {BaseID, CloneID}; return false; } @@ -692,7 +698,7 @@ bool MIParser::parseBasicBlockDefinition( bool IsEHFuncletEntry = false; std::optional SectionID; uint64_t Alignment = 0; - std::optional BBID; + std::optional BBID; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { do { diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c4a0474e393a..d7f07fd5c3c7 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -570,7 +570,9 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags, } if (getBBID().has_value()) { os << (hasAttributes ? ", " : " ("); - os << "bb_id " << *getBBID(); + os << "bb_id " << getBBID()->BaseID; + if (getBBID()->CloneID != 0) + os << " " << getBBID()->CloneID; hasAttributes = true; } } @@ -884,7 +886,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, removeSuccessor(OldI); } -void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, +void MachineBasicBlock::copySuccessor(const MachineBasicBlock *Orig, succ_iterator I) { if (!Orig->Probs.empty()) addSuccessor(*I, Orig->getSuccProbability(I)); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 88939e96e07f..6d5a05f7970e 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -451,16 +451,17 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) { /// Allocate a new MachineBasicBlock. Use this instead of /// `new MachineBasicBlock'. MachineBasicBlock * -MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { +MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, + std::optional BBID) { MachineBasicBlock *MBB = new (BasicBlockRecycler.Allocate(Allocator)) - MachineBasicBlock(*this, bb); + MachineBasicBlock(*this, BB); // Set BBID for `-basic-block=sections=labels` and // `-basic-block-sections=list` to allow robust mapping of profiles to basic // blocks. if (Target.getBBSectionsType() == BasicBlockSection::Labels || Target.getBBSectionsType() == BasicBlockSection::List) - MBB->setBBID(NextBBID++); + MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index b29404b42519..2c6eb6c57cfc 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -430,10 +431,18 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0, return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } -MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const { - assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated"); +MachineInstr & +TargetInstrInfo::duplicate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const { MachineFunction &MF = *MBB.getParent(); + // CFI instructions are marked as non-duplicable, because Darwin compact + // unwind info emission can't handle multiple prologue setups. + assert((!Orig.isNotDuplicable() || + (!MF.getTarget().getTargetTriple().isOSDarwin() && + Orig.isCFIInstruction())) && + "Instruction cannot be duplicated"); + return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 87ac68c834a8..3396a9884e40 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1265,6 +1265,7 @@ void TargetPassConfig::addMachinePasses() { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderPass( TM->getBBSectionsFuncListBuf())); + addPass(llvm::createBasicBlockPathCloningPass()); } addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || diff --git a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir index 74a7bcf3ae82..f11707c71989 100644 --- a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir @@ -136,7 +136,7 @@ body: | MOV32mi $rbp, 1, $noreg, -8, $noreg, 0 :: (store (s32) into %ir.2) - bb.3 (%ir-block.9, bb_id 3): + bb.3 (%ir-block.9, bb_id 3 2): renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load (s32) from %ir.2) $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp frame-destroy CFI_INSTRUCTION def_cfa $rsp, 8 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll new file mode 100644 index 000000000000..0f84b891a7c5 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll @@ -0,0 +1,71 @@ +;; Test cloning a single path with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test a valid application of path cloning. +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'p 0 3 5' >> %t +; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +;; Check the cloned block ids in MIR. + +; MIR: bb.7.b3 (bb_id 3 1): +; MIR: bb.8.b5 (bb_id 5 1): + +;; Check the final layout and branches. + +;; bb section: +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 +; CHECK: jne .LBB0_1 +; CHECK-NEXT: # %bb.7: # %b3 +; CHECK: jne .LBB0_4 +; CHECK-NEXT: # %bb.8: # %b5 +; CHECK: retq +; CHECK-NEXT: .LBB0_1: # %b1 +; CHECK: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %b2 +; CHECK: callq effect@PLT +; CHECK-NEXT: .LBB0_3: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: .LBB0_5: # %b5 +; CHECK: retq + +;; split section +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: foo.cold: # %cold diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll new file mode 100644 index 000000000000..c433491a4943 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll @@ -0,0 +1,86 @@ +;; Test cloning two paths with -basic-block-sections. + +declare void @effect(i32 zeroext) + +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'p 0 3 5' >> %t +; RUN: echo 'p 1 3 4 5' >> %t +; RUN: echo 'c 0 3.1 5.1' >> %t +; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +;; Check the cloned block ids in MIR. + +; MIR: bb.7.b3 (bb_id 3 1): +; MIR: bb.8.b5 (bb_id 5 1): +; MIR: bb.9.b3 (bb_id 3 2): +; MIR: bb.10.b4 (bb_id 4 1): +; MIR: bb.11.b5 (bb_id 5 2): + +;; Check the final layout and branches. + +;; first cluster: +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 +; CHECK: jne foo.__part.1 +; CHECK-NEXT: # %bb.7: # %b3 +; CHECK: jne .LBB0_4 +; CHECK-NEXT: # %bb.8: # %b5 +; CHECK: retq + +;; second cluster: +; CHECK: .section .text.foo,"ax",@progbits,unique,1 +; CHECK-NEXT: foo.__part.1: # %b1 +; CHECK: jne .LBB0_2 +; CHECK-NEXT: # %bb.9: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: # %bb.10: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: # %bb.11: # %b5 +; CHECK: retq +; CHECK-NEXT: .LBB0_2: # %b2 +; CHECK: callq effect@PLT +; CHECK-NEXT: # %bb.3: # %b3 +; CHECK: je .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %b4 +; CHECK: je foo.cold +; CHECK-NEXT: .LBB0_5: # %b5 +; CHECK: retq + +;; split section +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: foo.cold: # %cold + + diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll new file mode 100644 index 000000000000..d8686cdfa098 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll @@ -0,0 +1,45 @@ +;; Tests for invalid path cloning with -basic-block-sections involving indirect branches. + +declare void @effect(i32 zeroext) + +;; Test failed application of path cloning for paths with indirect branches. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f bar' >> %t1 +; RUN: echo 'p 0 1 2' >> %t1 +; RUN: echo 'c 0 1.1 2.1 1' >> %t1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s +; RUN: FileCheck %s --check-prefix=WARN < %t1.err +; RUN: echo 'v1' > %t2 +; RUN: echo 'f bar' >> %t2 +; RUN: echo 'p 1 2' >> %t2 +; RUN: echo 'c 0 1 2.1' >> %t2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s +; RUN: FileCheck %s --check-prefix=WARN < %t2.err + + +define void @bar(i1 %a, i1 %b) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b2 +b1: ; preds = %b0 + call void @effect(i32 1) + %0 = select i1 %b, ; [#uses=1] + ptr blockaddress(@bar, %b2), + ptr blockaddress(@bar, %b3) + indirectbr ptr %0, [label %b2, label %b3] +b2: ; preds = %b0, %b1 + call void @effect(i32 2) + ret void +b3: + call void @effect(i32 3) ; preds = %b1 + ret void +} + +; CHECK: .section .text.bar,"ax",@progbits +; CHECK: bar: +; CHECK: # %bb.0: # %b0 +; CHECK: # %bb.1: # %b1 +; CHECK: .section .text.split.bar,"ax",@progbits +; CHECK: bar.cold: # %b2 + +; WARN: warning: block #1 has indirect branch and appears as the non-tail block of a path in function bar diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll new file mode 100644 index 000000000000..3d9a8d36ca10 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll @@ -0,0 +1,43 @@ +;; Test for cloning a path ending with indirect branch with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test valid application of cloning for a path with indirect branch. +; RUN: echo 'v1' > %t +; RUN: echo 'f bar' >> %t +; RUN: echo 'p 0 1' >> %t +; RUN: echo 'c 0 1.1 2 1' >> %t +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s + +define void @bar(i1 %a, i1 %b) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b2 +b1: ; preds = %b0 + call void @effect(i32 1) + %0 = select i1 %b, ; [#uses=1] + ptr blockaddress(@bar, %b2), + ptr blockaddress(@bar, %b3) + indirectbr ptr %0, [label %b2, label %b3] +b2: ; preds = %b0, %b1 + call void @effect(i32 2) + ret void +b3: + call void @effect(i32 3) ; preds = %b1 + ret void +} + +; CHECK: .section .text.bar,"ax",@progbits +; CHECK: bar: +; CHECK: # %bb.0: # %b0 +; CHECK: je .LBB0_2 +; CHECK-NEXT: # %bb.4: # %b1 +; CHECK: jmpq *%rax +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: .LBB0_2: # %b2 +; CHECK: retq +; CHECK-NEXT: # %bb.1: # %b1 +; CHECK: jmpq *%rax +; CHECK: .section .text.split.bar,"ax",@progbits +; CHECK: bar.cold: # %b3 + diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll new file mode 100644 index 000000000000..521ec43ef050 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll @@ -0,0 +1,72 @@ +;; Tests for invalid or (partially invalid) path clonings with -basic-block-sections. + +declare void @effect(i32 zeroext) + +;; Test failed application of path cloning. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f foo' >> %t1 +; RUN: echo 'p 0 2 3' >> %t1 +; RUN: echo 'c 0 2.1 3.1 1' >> %t1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s +; RUN: FileCheck %s --check-prefixes=WARN1 < %t1.err +;; Test that valid clonings are applied correctly, even if invalid clonings exist. +; RUN: echo 'v1' > %t2 +; RUN: echo 'f foo' >> %t2 +; RUN: echo 'p 0 2 3' >> %t2 +; RUN: echo 'p 0 1 3' >> %t2 +; RUN: echo 'c 0 1.1 3.2 2.1 3.1 1' >> %t2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s --check-prefixes=PATH +; RUN: FileCheck %s --check-prefixes=WARN1 < %t2.err +; RUN: echo 'v1' > %t3 +; RUN: echo 'f foo' >> %t3 +; RUN: echo 'p 0 100' >> %t3 +; RUN: echo 'c 0 100.1 1' >> %t3 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s +; RUN: FileCheck %s --check-prefixes=WARN2 < %t3.err + +define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { +b0: + call void @effect(i32 0) + br i1 %a, label %b1, label %b3 + +b1: ; preds = %b0 + call void @effect(i32 1) + br i1 %b, label %b2, label %b3 + +b2: ; preds = %b1 + call void @effect(i32 2) + br label %b3 + +b3: ; preds = %b0, %b1, %b2 + call void @effect(i32 3) + br i1 %c, label %b4, label %b5 + +b4: ; preds = %b3 + call void @effect(i32 4) + br i1 %d, label %b5, label %cold + +b5: ; preds = %b3, %b4 + call void @effect(i32 5) + ret void +cold: + call void @effect(i32 6) ; preds = %b4 + ret void +} + +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: foo: +; CHECK: # %bb.0: # %b0 + +; CHECK: je .LBB0_3 +; PATH: # %bb.7: # %b1 +; PATH: # %bb.8: # %b3 +; PATH: jne .LBB0_4 +; CHECK: # %bb.1: # %b1 +; CHECK: jne foo.cold + +; CHECK: foo.cold: # %b2 + +;; Check the warnings +; WARN1: warning: block #2 is not a successor of block #0 in function foo +; WARN2: warning: no block with id 100 in function foo + -- Gitee From 2b51d01a9423a6612616df2a16662d24e3ea8fe0 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Fri, 17 Nov 2023 11:44:06 -0800 Subject: [PATCH 16/47] [NFC][SHT_LLVM_BB_ADDR_MAP] Define and use constructor and accessors for BBAddrMap fields. (#72689) The fields are still kept as public for now since our tooling accesses them. Will change them to private visibility in a later patch. --- llvm/include/llvm/Object/ELFTypes.h | 14 ++++++++++++-- llvm/lib/Object/ELF.cpp | 2 +- llvm/tools/llvm-objdump/llvm-objdump.cpp | 4 ++-- llvm/unittests/Object/ELFObjectFileTest.cpp | 8 ++++---- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 215313ee6f9b..c4783919c736 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -796,7 +796,6 @@ template struct Elf_Mips_ABIFlags { // Struct representing the BBAddrMap for one function. struct BBAddrMap { - uint64_t Addr; // Function address // Struct representing the BBAddrMap information for one basic block. struct BBEntry { struct Metadata { @@ -857,13 +856,24 @@ struct BBAddrMap { bool isEHPad() const { return MD.IsEHPad; } bool canFallThrough() const { return MD.CanFallThrough; } }; - std::vector BBEntries; // Basic block entries for this function. + + BBAddrMap(uint64_t Addr, std::vector BBEntries) + : Addr(Addr), BBEntries(std::move(BBEntries)) {} + + // Returns the address of the corresponding function. + uint64_t getFunctionAddress() const { return Addr; } + + // Returns the basic block entries for this function. + const std::vector &getBBEntries() const { return BBEntries; } // Equality operator for unit testing. bool operator==(const BBAddrMap &Other) const { return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(), Other.BBEntries.begin()); } + + uint64_t Addr; // Function address + std::vector BBEntries; // Basic block entries for this function. }; } // end namespace object. diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 0d1862e57371..1d73a6ffa73f 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -745,7 +745,7 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, } BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); } - FunctionEntries.push_back({Address, std::move(BBEntries)}); + FunctionEntries.emplace_back(Address, std::move(BBEntries)); } // Either Cur is in the error state, or we have an error in ULEBSizeErr or // MetadataDecodeErr (but not both), but we join all errors here to be safe. diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index bd45ed199767..bee76d9c869a 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1156,8 +1156,8 @@ collectBBAddrMapLabels(const std::unordered_map &AddrToBBAd auto Iter = AddrToBBAddrMap.find(StartAddress); if (Iter == AddrToBBAddrMap.end()) return; - for (const BBAddrMap::BBEntry &BBEntry : Iter->second.BBEntries) { - uint64_t BBAddress = BBEntry.Offset + Iter->second.Addr; + for (const BBAddrMap::BBEntry &BBEntry : Iter->second.getBBEntries()) { + uint64_t BBAddress = BBEntry.Offset + Iter->second.getFunctionAddress(); if (BBAddress >= EndAddress) continue; Labels[BBAddress].push_back(("BB" + Twine(BBEntry.ID)).str()); diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index fe5ce2154dc7..17402f39a5df 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -661,10 +661,10 @@ Sections: Metadata: 0x18 )"); - BBAddrMap E1 = {0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}}; - BBAddrMap E2 = {0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}}; - BBAddrMap E3 = {0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}}}; - BBAddrMap E4 = {0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}}}; + BBAddrMap E1(0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}); + BBAddrMap E2(0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}); + BBAddrMap E3(0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}}); + BBAddrMap E4(0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}}); std::vector Section0BBAddrMaps = {E4}; std::vector Section1BBAddrMaps = {E3}; -- Gitee From 3810753cbf4fe90d4f4e48bb768e19452617fa22 Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Tue, 12 Dec 2023 10:23:16 -0500 Subject: [PATCH 17/47] [SHT_LLVM_BB_ADDR_MAP] Implements PGOAnalysisMap in Object and ObjectYAML with tests. Reviewed in PR (#71750). A part of [RFC - PGO Accuracy Metrics: Emitting and Evaluating Branch and Block Analysis](https://discourse.llvm.org/t/rfc-pgo-accuracy-metrics-emitting-and-evaluating-branch-and-block-analysis/73902). This PR adds the PGOAnalysisMap data structure and implements encoding and decoding through Object and ObjectYAML along with associated tests. When emitted into the bb-addr-map section, each function is followed by the associated pgo-analysis-map for that function. The emitting of each analysis in the map is controlled by a bit in the bb-addr-map feature byte. All existing bb-addr-map code can ignore the pgo-analysis-map if the caller does not request the data. --- llvm/include/llvm/Object/ELF.h | 6 +- llvm/include/llvm/Object/ELFObjectFile.h | 8 +- llvm/include/llvm/Object/ELFTypes.h | 75 ++++ llvm/include/llvm/ObjectYAML/ELFYAML.h | 33 ++ llvm/lib/Object/ELF.cpp | 148 ++++++-- llvm/lib/Object/ELFObjectFile.cpp | 23 +- llvm/lib/ObjectYAML/ELFEmitter.cpp | 70 +++- llvm/lib/ObjectYAML/ELFYAML.cpp | 23 ++ llvm/unittests/Object/ELFObjectFileTest.cpp | 386 +++++++++++++++++++- llvm/unittests/Object/ELFTypesTest.cpp | 37 ++ 10 files changed, 752 insertions(+), 57 deletions(-) diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index a1cf47a1c4a6..df9cc631b350 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -414,8 +414,12 @@ public: /// within the text section that the SHT_LLVM_BB_ADDR_MAP section \p Sec /// is associated with. If the current ELFFile is relocatable, a corresponding /// \p RelaSec must be passed in as an argument. + /// Optional out variable to collect all PGO Analyses. New elements are only + /// added if no error occurs. If not provided, the PGO Analyses are decoded + /// then ignored. Expected> - decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec = nullptr) const; + decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec = nullptr, + std::vector *PGOAnalyses = nullptr) const; /// Returns a map from every section matching \p IsMatch to its relocation /// section, or \p nullptr if it has no relocation section. This function diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index f3016cc141b0..5997a35a3905 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -111,9 +111,13 @@ public: /// Returns a vector of all BB address maps in the object file. When // `TextSectionIndex` is specified, only returns the BB address maps - // corresponding to the section with that index. + // corresponding to the section with that index. When `PGOAnalyses`is + // specified, the vector is cleared then filled with extra PGO data. + // `PGOAnalyses` will always be the same length as the return value on + // success, otherwise it is empty. Expected> - readBBAddrMap(std::optional TextSectionIndex = std::nullopt) const; + readBBAddrMap(std::optional TextSectionIndex = std::nullopt, + std::vector *PGOAnalyses = nullptr) const; }; class ELFSectionRef : public SectionRef { diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index c4783919c736..ea0f8cec6b7b 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -13,6 +13,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Error.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" @@ -876,6 +878,79 @@ struct BBAddrMap { std::vector BBEntries; // Basic block entries for this function. }; +/// A feature extension of BBAddrMap that holds information relevant to PGO. +struct PGOAnalysisMap { + /// Bitfield of optional features to include in the PGO extended map. + struct Features { + bool FuncEntryCount : 1; + bool BBFreq : 1; + bool BrProb : 1; + + // Encodes to minimum bit width representation. + uint8_t encode() const { + return (static_cast(FuncEntryCount) << 0) | + (static_cast(BBFreq) << 1) | + (static_cast(BrProb) << 2); + } + + // Decodes from minimum bit width representation and validates no + // unnecessary bits are used. + static Expected decode(uint8_t Val) { + Features Feat{static_cast(Val & (1 << 0)), + static_cast(Val & (1 << 1)), + static_cast(Val & (1 << 2))}; + if (Feat.encode() != Val) + return createStringError( + std::error_code(), + "invalid encoding for PGOAnalysisMap::Features: 0x%x", Val); + return Feat; + } + + bool operator==(const Features &Other) const { + return std::tie(FuncEntryCount, BBFreq, BrProb) == + std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb); + } + }; + + /// Extra basic block data with fields for block frequency and branch + /// probability. + struct PGOBBEntry { + /// Single successor of a given basic block that contains the tag and branch + /// probability associated with it. + struct SuccessorEntry { + /// Unique ID of this successor basic block. + uint32_t ID; + /// Branch Probability of the edge to this successor taken from MBPI. + BranchProbability Prob; + + bool operator==(const SuccessorEntry &Other) const { + return std::tie(ID, Prob) == std::tie(Other.ID, Other.Prob); + } + }; + + /// Block frequency taken from MBFI + BlockFrequency BlockFreq; + /// List of successors of the current block + llvm::SmallVector Successors; + + bool operator==(const PGOBBEntry &Other) const { + return std::tie(BlockFreq, Successors) == + std::tie(Other.BlockFreq, Other.Successors); + } + }; + + uint64_t FuncEntryCount; // Prof count from IR function + std::vector BBEntries; // Extended basic block entries + + // Flags to indicate if each PGO related info was enabled in this function + Features FeatEnable; + + bool operator==(const PGOAnalysisMap &Other) const { + return std::tie(FuncEntryCount, BBEntries, FeatEnable) == + std::tie(Other.FuncEntryCount, Other.BBEntries, Other.FeatEnable); + } +}; + } // end namespace object. } // end namespace llvm. diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 1ba41232f552..12b47c271da2 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -170,6 +170,19 @@ struct BBAddrMapEntry { std::optional> BBEntries; }; +struct PGOAnalysisMapEntry { + struct PGOBBEntry { + struct SuccessorEntry { + uint32_t ID; + llvm::yaml::Hex32 BrProb; + }; + std::optional BBFreq; + std::optional> Successors; + }; + std::optional FuncEntryCount; + std::optional> PGOBBEntries; +}; + struct StackSizeEntry { llvm::yaml::Hex64 Address; llvm::yaml::Hex64 Size; @@ -317,6 +330,7 @@ struct SectionHeaderTable : Chunk { struct BBAddrMapSection : Section { std::optional> Entries; + std::optional> PGOAnalyses; BBAddrMapSection() : Section(ChunkKind::BBAddrMap) {} @@ -737,6 +751,10 @@ bool shouldAllocateFileSpace(ArrayRef Phdrs, LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry::BBEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::PGOAnalysisMapEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::PGOAnalysisMapEntry::PGOBBEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::LinkerOption) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::CallGraphEntryWeight) @@ -905,6 +923,21 @@ template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &Rel); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::PGOAnalysisMapEntry &Rel); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &Rel); +}; + +template <> +struct MappingTraits { + static void + mapping(IO &IO, + ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry &Rel); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::GnuHashHeader &Rel); }; diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 1d73a6ffa73f..17668da5a8c6 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -645,11 +645,36 @@ ELFFile::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const { return base() + Offset; } -template -Expected> -ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, - const Elf_Shdr *RelaSec) const { - bool IsRelocatable = getHeader().e_type == ELF::ET_REL; +// Helper to extract and decode the next ULEB128 value as unsigned int. +// Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the unsigned +// int limit. +// Also returns zero if ULEBSizeErr is already in an error state. +// ULEBSizeErr is an out variable if an error occurs. +template , int> = 0> +static IntTy readULEB128As(DataExtractor &Data, DataExtractor::Cursor &Cur, + Error &ULEBSizeErr) { + // Bail out and do not extract data if ULEBSizeErr is already set. + if (ULEBSizeErr) + return 0; + uint64_t Offset = Cur.tell(); + uint64_t Value = Data.getULEB128(Cur); + if (Value > std::numeric_limits::max()) { + ULEBSizeErr = createError("ULEB128 value at offset 0x" + + Twine::utohexstr(Offset) + " exceeds UINT" + + Twine(std::numeric_limits::digits) + + "_MAX (0x" + Twine::utohexstr(Value) + ")"); + return 0; + } + return static_cast(Value); +} + +template +static Expected> +decodeBBAddrMapImpl(const ELFFile &EF, + const typename ELFFile::Elf_Shdr &Sec, + const typename ELFFile::Elf_Shdr *RelaSec, + std::vector *PGOAnalyses) { + bool IsRelocatable = EF.getHeader().e_type == ELF::ET_REL; // This DenseMap maps the offset of each function (the location of the // reference to the function in the SHT_LLVM_BB_ADDR_MAP section) to the @@ -659,44 +684,28 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, assert(RelaSec && "Can't read a SHT_LLVM_BB_ADDR_MAP section in a relocatable " "object file without providing a relocation section."); - Expected Relas = this->relas(*RelaSec); + Expected::Elf_Rela_Range> Relas = EF.relas(*RelaSec); if (!Relas) return createError("unable to read relocations for section " + - describe(*this, Sec) + ": " + + describe(EF, Sec) + ": " + toString(Relas.takeError())); - for (Elf_Rela Rela : *Relas) + for (typename ELFFile::Elf_Rela Rela : *Relas) FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend; } - Expected> ContentsOrErr = getSectionContents(Sec); + Expected> ContentsOrErr = EF.getSectionContents(Sec); if (!ContentsOrErr) return ContentsOrErr.takeError(); ArrayRef Content = *ContentsOrErr; - DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4); + DataExtractor Data(Content, EF.isLE(), ELFT::Is64Bits ? 8 : 4); std::vector FunctionEntries; DataExtractor::Cursor Cur(0); Error ULEBSizeErr = Error::success(); Error MetadataDecodeErr = Error::success(); - // Helper to extract and decode the next ULEB128 value as uint32_t. - // Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t - // limit. - // Also returns zero if ULEBSizeErr is already in an error state. - auto ReadULEB128AsUInt32 = [&Data, &Cur, &ULEBSizeErr]() -> uint32_t { - // Bail out and do not extract data if ULEBSizeErr is already set. - if (ULEBSizeErr) - return 0; - uint64_t Offset = Cur.tell(); - uint64_t Value = Data.getULEB128(Cur); - if (Value > UINT32_MAX) { - ULEBSizeErr = createError( - "ULEB128 value at offset 0x" + Twine::utohexstr(Offset) + - " exceeds UINT32_MAX (0x" + Twine::utohexstr(Value) + ")"); - return 0; - } - return static_cast(Value); - }; uint8_t Version = 0; + uint8_t Feature = 0; + PGOAnalysisMap::Features FeatEnable{}; while (!ULEBSizeErr && !MetadataDecodeErr && Cur && Cur.tell() < Content.size()) { if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { @@ -706,10 +715,24 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, if (Version > 2) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast(Version))); - Data.getU8(Cur); // Feature byte + Feature = Data.getU8(Cur); // Feature byte + if (!Cur) + break; + auto FeatEnableOrErr = PGOAnalysisMap::Features::decode(Feature); + if (!FeatEnableOrErr) + return FeatEnableOrErr.takeError(); + FeatEnable = + FeatEnableOrErr ? *FeatEnableOrErr : PGOAnalysisMap::Features{}; + if (Feature != 0 && Version < 2 && Cur) + return createError( + "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when " + "PGO features are enabled: version = " + + Twine(static_cast(Version)) + + " feature = " + Twine(static_cast(Feature))); } uint64_t SectionOffset = Cur.tell(); - uintX_t Address = static_cast(Data.getAddress(Cur)); + auto Address = + static_cast::uintX_t>(Data.getAddress(Cur)); if (!Cur) return Cur.takeError(); if (IsRelocatable) { @@ -718,20 +741,23 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, if (FOTIterator == FunctionOffsetTranslations.end()) { return createError("failed to get relocation data for offset: " + Twine::utohexstr(SectionOffset) + " in section " + - describe(*this, Sec)); + describe(EF, Sec)); } Address = FOTIterator->second; } - uint32_t NumBlocks = ReadULEB128AsUInt32(); + uint32_t NumBlocks = readULEB128As(Data, Cur, ULEBSizeErr); + std::vector BBEntries; uint32_t PrevBBEndOffset = 0; for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); ++BlockIndex) { - uint32_t ID = Version >= 2 ? ReadULEB128AsUInt32() : BlockIndex; - uint32_t Offset = ReadULEB128AsUInt32(); - uint32_t Size = ReadULEB128AsUInt32(); - uint32_t MD = ReadULEB128AsUInt32(); + uint32_t ID = Version >= 2 + ? readULEB128As(Data, Cur, ULEBSizeErr) + : BlockIndex; + uint32_t Offset = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t Size = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t MD = readULEB128As(Data, Cur, ULEBSizeErr); if (Version >= 1) { // Offset is calculated relative to the end of the previous BB. Offset += PrevBBEndOffset; @@ -746,6 +772,44 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); } FunctionEntries.emplace_back(Address, std::move(BBEntries)); + + if (FeatEnable.FuncEntryCount || FeatEnable.BBFreq || FeatEnable.BrProb) { + // Function entry count + uint64_t FuncEntryCount = + FeatEnable.FuncEntryCount + ? readULEB128As(Data, Cur, ULEBSizeErr) + : 0; + + std::vector PGOBBEntries; + for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && + (BlockIndex < NumBlocks); + ++BlockIndex) { + // Block frequency + uint64_t BBF = FeatEnable.BBFreq + ? readULEB128As(Data, Cur, ULEBSizeErr) + : 0; + + // Branch probability + llvm::SmallVector + Successors; + if (FeatEnable.BrProb) { + auto SuccCount = readULEB128As(Data, Cur, ULEBSizeErr); + for (uint64_t I = 0; I < SuccCount; ++I) { + uint32_t BBID = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t BrProb = readULEB128As(Data, Cur, ULEBSizeErr); + if (PGOAnalyses) + Successors.push_back({BBID, BranchProbability::getRaw(BrProb)}); + } + } + + if (PGOAnalyses) + PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)}); + } + + if (PGOAnalyses) + PGOAnalyses->push_back( + {FuncEntryCount, std::move(PGOBBEntries), FeatEnable}); + } } // Either Cur is in the error state, or we have an error in ULEBSizeErr or // MetadataDecodeErr (but not both), but we join all errors here to be safe. @@ -755,6 +819,18 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, return FunctionEntries; } +template +Expected> +ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec, + std::vector *PGOAnalyses) const { + size_t OriginalPGOSize = PGOAnalyses ? PGOAnalyses->size() : 0; + auto AddrMapsOrErr = decodeBBAddrMapImpl(*this, Sec, RelaSec, PGOAnalyses); + // remove new analyses when an error occurs + if (!AddrMapsOrErr && PGOAnalyses) + PGOAnalyses->resize(OriginalPGOSize); + return std::move(AddrMapsOrErr); +} + template Expected< MapVector> diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 143f9d37849d..462cef1c6d4c 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -710,10 +710,13 @@ std::vector ELFObjectFileBase::getPltEntries() const { template Expected> static readBBAddrMapImpl( - const ELFFile &EF, std::optional TextSectionIndex) { + const ELFFile &EF, std::optional TextSectionIndex, + std::vector *PGOAnalyses) { using Elf_Shdr = typename ELFT::Shdr; bool IsRelocatable = EF.getHeader().e_type == ELF::ET_REL; std::vector BBAddrMaps; + if (PGOAnalyses) + PGOAnalyses->clear(); const auto &Sections = cantFail(EF.sections()); auto IsMatch = [&](const Elf_Shdr &Sec) -> Expected { @@ -742,10 +745,13 @@ Expected> static readBBAddrMapImpl( return createError("unable to get relocation section for " + describe(EF, *Sec)); Expected> BBAddrMapOrErr = - EF.decodeBBAddrMap(*Sec, RelocSec); - if (!BBAddrMapOrErr) + EF.decodeBBAddrMap(*Sec, RelocSec, PGOAnalyses); + if (!BBAddrMapOrErr) { + if (PGOAnalyses) + PGOAnalyses->clear(); return createError("unable to read " + describe(EF, *Sec) + ": " + toString(BBAddrMapOrErr.takeError())); + } std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(), std::back_inserter(BBAddrMaps)); } @@ -822,13 +828,14 @@ ELFObjectFileBase::readDynsymVersions() const { } Expected> ELFObjectFileBase::readBBAddrMap( - std::optional TextSectionIndex) const { + std::optional TextSectionIndex, + std::vector *PGOAnalyses) const { if (const auto *Obj = dyn_cast(this)) - return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses); if (const auto *Obj = dyn_cast(this)) - return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses); if (const auto *Obj = dyn_cast(this)) - return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex); + return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses); return readBBAddrMapImpl(cast(this)->getELFFile(), - TextSectionIndex); + TextSectionIndex, PGOAnalyses); } diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index a64ab62ef22b..917eb2a60428 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; @@ -1390,10 +1391,24 @@ template void ELFState::writeSectionContent( Elf_Shdr &SHeader, const ELFYAML::BBAddrMapSection &Section, ContiguousBlobAccumulator &CBA) { - if (!Section.Entries) + if (!Section.Entries) { + if (Section.PGOAnalyses) + WithColor::warning() + << "PGOAnalyses should not exist in SHT_LLVM_BB_ADDR_MAP when " + "Entries does not exist"; return; + } + + const std::vector *PGOAnalyses = nullptr; + if (Section.PGOAnalyses) { + if (Section.Entries->size() != Section.PGOAnalyses->size()) + WithColor::warning() << "PGOAnalyses must be the same length as Entries " + "in SHT_LLVM_BB_ADDR_MAP"; + else + PGOAnalyses = &Section.PGOAnalyses.value(); + } - for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) { + for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { if (E.Version > 2) @@ -1404,6 +1419,14 @@ void ELFState::writeSectionContent( CBA.write(E.Feature); SHeader.sh_size += 2; } + + if (Section.PGOAnalyses) { + if (E.Version < 2) + WithColor::warning() + << "unsupported SHT_LLVM_BB_ADDR_MAP version when using PGO: " + << static_cast(E.Version) << "; must use version >= 2"; + } + // Write the address of the function. CBA.write(E.Address, ELFT::TargetEndianness); // Write number of BBEntries (number of basic blocks in the function). This @@ -1412,14 +1435,43 @@ void ELFState::writeSectionContent( E.NumBlocks.value_or(E.BBEntries ? E.BBEntries->size() : 0); SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); // Write all BBEntries. - if (!E.BBEntries) + if (E.BBEntries) { + for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) { + if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1) + SHeader.sh_size += CBA.writeULEB128(BBE.ID); + SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) + + CBA.writeULEB128(BBE.Size) + + CBA.writeULEB128(BBE.Metadata); + } + } + + if (!PGOAnalyses) continue; - for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) { - if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1) - SHeader.sh_size += CBA.writeULEB128(BBE.ID); - SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) + - CBA.writeULEB128(BBE.Size) + - CBA.writeULEB128(BBE.Metadata); + const ELFYAML::PGOAnalysisMapEntry &PGOEntry = PGOAnalyses->at(Idx); + + if (PGOEntry.FuncEntryCount) + SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount); + + if (!PGOEntry.PGOBBEntries) + continue; + + const auto &PGOBBEntries = PGOEntry.PGOBBEntries.value(); + if (!E.BBEntries || E.BBEntries->size() != PGOBBEntries.size()) { + WithColor::warning() << "PBOBBEntries must be the same length as " + "BBEntries in SHT_LLVM_BB_ADDR_MAP.\n" + << "Mismatch on function with address: " + << E.Address; + continue; + } + + for (const auto &PGOBBE : PGOBBEntries) { + if (PGOBBE.BBFreq) + SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); + if (PGOBBE.Successors) { + SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); + for (const auto &[ID, BrProb] : *PGOBBE.Successors) + SHeader.sh_size += CBA.writeULEB128(ID) + CBA.writeULEB128(BrProb); + } } } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index e92c61d81055..83deb680074f 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1386,6 +1386,7 @@ static void sectionMapping(IO &IO, ELFYAML::BBAddrMapSection &Section) { commonSectionMapping(IO, Section); IO.mapOptional("Content", Section.Content); IO.mapOptional("Entries", Section.Entries); + IO.mapOptional("PGOAnalyses", Section.PGOAnalyses); } static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) { @@ -1821,6 +1822,28 @@ void MappingTraits::mapping( IO.mapRequired("Metadata", E.Metadata); } +void MappingTraits::mapping( + IO &IO, ELFYAML::PGOAnalysisMapEntry &E) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("FuncEntryCount", E.FuncEntryCount); + IO.mapOptional("PGOBBEntries", E.PGOBBEntries); +} + +void MappingTraits::mapping( + IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("BBFreq", E.BBFreq); + IO.mapOptional("Successors", E.Successors); +} + +void MappingTraits:: + mapping(IO &IO, + ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry &E) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapRequired("ID", E.ID); + IO.mapRequired("BrProb", E.BrProb); +} + void MappingTraits::mapping(IO &IO, ELFYAML::GnuHashHeader &E) { assert(IO.getContext() && "The IO context is not initialized"); diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 17402f39a5df..2878ca088cd7 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Testing/Support/Error.h" @@ -683,7 +684,7 @@ Sections: ElfOrErr->getELFFile().getSection(1); ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex); - EXPECT_THAT_EXPECTED(BBAddrMaps, Succeeded()); + ASSERT_THAT_EXPECTED(BBAddrMaps, Succeeded()); EXPECT_EQ(*BBAddrMaps, ExpectedResult); }; @@ -744,6 +745,389 @@ Sections: Section1BBAddrMaps); } +// Tests for error paths of the ELFFile::decodeBBAddrMap with PGOAnalysisMap +// API. +TEST(ELFObjectFileTest, InvalidDecodePGOAnalysisMap) { + if (IsHostWindows()) + GTEST_SKIP(); + StringRef CommonYamlString(R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Type: SHT_LLVM_BB_ADDR_MAP + Name: .llvm_bb_addr_map + Entries: + - Address: 0x11111 +)"); + + auto DoCheck = [&](StringRef YamlString, const char *ErrMsg) { + SmallString<0> Storage; + Expected> ElfOrErr = + toBinary(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + const ELFFile &Elf = ElfOrErr->getELFFile(); + + Expected BBAddrMapSecOrErr = + Elf.getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + + std::vector PGOAnalyses; + EXPECT_THAT_ERROR( + Elf.decodeBBAddrMap(**BBAddrMapSecOrErr, nullptr, &PGOAnalyses) + .takeError(), + FailedWithMessage(ErrMsg)); + }; + + // Check that we can detect unsupported versions that are too old. + SmallString<128> UnsupportedLowVersionYamlString(CommonYamlString); + UnsupportedLowVersionYamlString += R"( + Version: 1 + Feature: 0x4 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 +)"; + + DoCheck(UnsupportedLowVersionYamlString, + "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when PGO features " + "are enabled: version = 1 feature = 4"); + + SmallString<128> CommonVersionedYamlString(CommonYamlString); + CommonVersionedYamlString += R"( + Version: 2 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 +)"; + + // Check that we fail when function entry count is enabled but not provided. + SmallString<128> MissingFuncEntryCount(CommonYamlString); + MissingFuncEntryCount += R"( + Version: 2 + Feature: 0x01 +)"; + + DoCheck(MissingFuncEntryCount, + "unable to decode LEB128 at offset 0x0000000b: malformed uleb128, " + "extends past end"); + + // Check that we fail when basic block frequency is enabled but not provided. + SmallString<128> MissingBBFreq(CommonYamlString); + MissingBBFreq += R"( + Version: 2 + Feature: 0x02 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 +)"; + + DoCheck(MissingBBFreq, "unable to decode LEB128 at offset 0x0000000f: " + "malformed uleb128, extends past end"); + + // Check that we fail when branch probability is enabled but not provided. + SmallString<128> MissingBrProb(CommonYamlString); + MissingBrProb += R"( + Version: 2 + Feature: 0x04 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x6 + - ID: 2 + AddressOffset: 0x1 + Size: 0x1 + Metadata: 0x2 + - ID: 3 + AddressOffset: 0x2 + Size: 0x1 + Metadata: 0x2 + PGOAnalyses: + - PGOBBEntries: + - Successors: + - ID: 2 + BrProb: 0x80000000 + - ID: 3 + BrProb: 0x80000000 + - Successors: + - ID: 3 + BrProb: 0xF0000000 +)"; + + DoCheck(MissingBrProb, "unable to decode LEB128 at offset 0x00000017: " + "malformed uleb128, extends past end"); +} + +// Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap. +TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { + if (IsHostWindows()) + GTEST_SKIP(); + StringRef CommonYamlString(R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map_1 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Version: 2 + Address: 0x11111 + Feature: 0x1 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + PGOAnalyses: + - FuncEntryCount: 892 + - Name: .llvm_bb_addr_map_2 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 1 + Entries: + - Version: 2 + Address: 0x22222 + Feature: 0x2 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 + PGOAnalyses: + - PGOBBEntries: + - BBFreq: 343 + - Name: .llvm_bb_addr_map_3 + Type: SHT_LLVM_BB_ADDR_MAP + Link: 2 + Entries: + - Version: 2 + Address: 0x33333 + Feature: 0x4 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x6 + - ID: 1 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x4 + - ID: 2 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x0 + PGOAnalyses: + - PGOBBEntries: + - Successors: + - ID: 1 + BrProb: 0x11111111 + - ID: 2 + BrProb: 0xeeeeeeee + - Successors: + - ID: 2 + BrProb: 0xffffffff + - Successors: [] + - Name: .llvm_bb_addr_map_4 + Type: SHT_LLVM_BB_ADDR_MAP + # Link: 0 (by default, can be overriden) + Entries: + - Version: 2 + Address: 0x44444 + Feature: 0x7 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x18 + - ID: 1 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 + - ID: 3 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 + PGOAnalyses: + - FuncEntryCount: 1000 + PGOBBEntries: + - BBFreq: 1000 + Successors: + - ID: 1 + BrProb: 0x22222222 + - ID: 2 + BrProb: 0x33333333 + - ID: 3 + BrProb: 0xaaaaaaaa + - BBFreq: 133 + Successors: + - ID: 2 + BrProb: 0x11111111 + - ID: 3 + BrProb: 0xeeeeeeee + - BBFreq: 18 + Successors: + - ID: 3 + BrProb: 0xffffffff + - BBFreq: 1000 + Successors: [] +)"); + + BBAddrMap E1(0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}); + PGOAnalysisMap P1 = {892, {{}}, {true, false, false}}; + BBAddrMap E2(0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}); + PGOAnalysisMap P2 = {{}, {{BlockFrequency(343), {}}}, {false, true, false}}; + BBAddrMap E3(0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}, + {1, 0x3, 0x3, {false, false, true, false, false}}, + {2, 0x6, 0x3, {false, false, false, false, false}}}); + PGOAnalysisMap P3 = {{}, + {{{}, + {{1, BranchProbability::getRaw(0x1111'1111)}, + {2, BranchProbability::getRaw(0xeeee'eeee)}}}, + {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, + {{}, {}}}, + {false, false, true}}; + BBAddrMap E4(0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}, + {1, 0x4, 0x4, {false, false, false, false, false}}, + {2, 0x8, 0x4, {false, false, false, false, false}}, + {3, 0xc, 0x4, {false, false, false, false, false}}}); + PGOAnalysisMap P4 = { + 1000, + {{BlockFrequency(1000), + {{1, BranchProbability::getRaw(0x2222'2222)}, + {2, BranchProbability::getRaw(0x3333'3333)}, + {3, BranchProbability::getRaw(0xaaaa'aaaa)}}}, + {BlockFrequency(133), + {{2, BranchProbability::getRaw(0x1111'1111)}, + {3, BranchProbability::getRaw(0xeeee'eeee)}}}, + {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, + {BlockFrequency(1000), {}}}, + {true, true, true}}; + + std::vector Section0BBAddrMaps = {E4}; + std::vector Section1BBAddrMaps = {E3}; + std::vector Section2BBAddrMaps = {E1, E2}; + std::vector AllBBAddrMaps = {E1, E2, E3, E4}; + + std::vector Section0PGOAnalysisMaps = {P4}; + std::vector Section1PGOAnalysisMaps = {P3}; + std::vector Section2PGOAnalysisMaps = {P1, P2}; + std::vector AllPGOAnalysisMaps = {P1, P2, P3, P4}; + + auto DoCheckSucceeds = + [&](StringRef YamlString, std::optional TextSectionIndex, + std::vector ExpectedResult, + std::optional> ExpectedPGO) { + SmallString<0> Storage; + Expected> ElfOrErr = + toBinary(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + + Expected BBAddrMapSecOrErr = + ElfOrErr->getELFFile().getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + + std::vector PGOAnalyses; + auto BBAddrMaps = ElfOrErr->readBBAddrMap( + TextSectionIndex, ExpectedPGO ? &PGOAnalyses : nullptr); + ASSERT_THAT_EXPECTED(BBAddrMaps, Succeeded()); + EXPECT_EQ(*BBAddrMaps, ExpectedResult); + if (ExpectedPGO) { + EXPECT_EQ(BBAddrMaps->size(), PGOAnalyses.size()); + EXPECT_EQ(PGOAnalyses, *ExpectedPGO); + } + }; + + auto DoCheckFails = [&](StringRef YamlString, + std::optional TextSectionIndex, + const char *ErrMsg) { + SmallString<0> Storage; + Expected> ElfOrErr = + toBinary(Storage, YamlString); + ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded()); + + Expected BBAddrMapSecOrErr = + ElfOrErr->getELFFile().getSection(1); + ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded()); + std::vector PGOAnalyses; + EXPECT_THAT_ERROR( + ElfOrErr->readBBAddrMap(TextSectionIndex, &PGOAnalyses).takeError(), + FailedWithMessage(ErrMsg)); + }; + + // Check that we can retrieve the data in the normal case. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps, + std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps, + std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps, + std::nullopt); + + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, AllPGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps, + Section0PGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps, + Section1PGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps, + Section2PGOAnalysisMaps); + // Check that when no bb-address-map section is found for a text section, + // we return an empty result. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, + std::vector{}); + + // Check that we detect when a bb-addr-map section is linked to an invalid + // (not present) section. + SmallString<128> InvalidLinkedYamlString(CommonYamlString); + InvalidLinkedYamlString += R"( + Link: 10 +)"; + + DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/4, + "unable to get the linked-to section for " + "SHT_LLVM_BB_ADDR_MAP section with index 4: invalid section " + "index: 10"); + // Linked sections are not checked when we don't target a specific text + // section. + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, std::nullopt); + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, AllPGOAnalysisMaps); + + // Check that we can detect when bb-address-map decoding fails. + SmallString<128> TruncatedYamlString(CommonYamlString); + TruncatedYamlString += R"( + ShSize: 0xa +)"; + + DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 4: " + "unable to decode LEB128 at offset 0x0000000a: malformed " + "uleb128, extends past end"); + // Check that we can read the other section's bb-address-maps which are + // valid. + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, std::nullopt); + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, Section1PGOAnalysisMaps); +} + // Test for ObjectFile::getRelocatedSection: check that it returns a relocated // section for executable and relocatable files. TEST(ELFObjectFileTest, ExecutableWithRelocs) { diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index 2c9e8b7aebac..f09ab5e12438 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -94,3 +94,40 @@ TEST(ELFTypesTest, BBEntryMetadataInvalidEncodingTest) { FailedWithMessage(Errors[i])); } } + +static_assert( + std::is_same_v, + "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap"); + +TEST(ELFTypesTest, PGOAnalysisMapFeaturesEncodingTest) { + const std::array Decoded = { + {{false, false, false}, + {true, false, false}, + {false, true, false}, + {false, false, true}, + {true, true, false}, + {false, true, true}, + {true, true, true}}}; + const std::array Encoded = { + {0b000, 0b001, 0b010, 0b100, 0b011, 0b110, 0b111}}; + for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) + EXPECT_EQ(Feat.encode(), EncodedVal); + for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) { + Expected FeatEnableOrError = + PGOAnalysisMap::Features::decode(EncodedVal); + ASSERT_THAT_EXPECTED(FeatEnableOrError, Succeeded()); + EXPECT_EQ(*FeatEnableOrError, Feat); + } +} + +TEST(ELFTypesTest, PGOAnalysisMapFeaturesInvalidEncodingTest) { + const std::array Errors = { + "invalid encoding for PGOAnalysisMap::Features: 0x8", + "invalid encoding for PGOAnalysisMap::Features: 0xff"}; + const std::array Values = {{0b1000, 0b1111'1111}}; + for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { + EXPECT_THAT_ERROR(PGOAnalysisMap::Features::decode(Val).takeError(), + FailedWithMessage(Error)); + } +} -- Gitee From ff29ab9eb892383af2451e55556bd4e3db22d8e9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 28 Dec 2023 19:39:50 -0800 Subject: [PATCH 18/47] [Driver][test] -fbasic-block-sections: replace legacy -target with --target= --- clang/test/Driver/fbasic-block-sections.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index 60889fb11530..f5007faf2e38 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -1,16 +1,16 @@ -// RUN: %clang -### -target x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-ALL %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s -// RUN: not %clang -c -target arm-unknown-linux -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s -// RUN: %clang -### -target arm-unknown-linux -fbasic-block-sections=all -fbasic-block-sections=none %s -S 2>&1 \ +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-ALL %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s +// RUN: not %clang -c --target=arm-unknown-linux -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s +// RUN: %clang -### --target=arm-unknown-linux -fbasic-block-sections=all -fbasic-block-sections=none %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-NOOPT %s -// RUN: not %clang -c -target x86_64-apple-darwin10 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=alll %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=list %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=list= %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NULL-LIST %s -// RUN: %clang -### -target x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s -// RUN: %clang -### -x cuda -nocudainc -nocudalib -target x86_64 -fbasic-block-sections=all %s -c 2>&1 \ +// RUN: not %clang -c --target=x86_64-apple-darwin10 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=alll %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=list %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=list= %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NULL-LIST %s +// RUN: %clang -### --target=x86_64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s +// RUN: %clang -### -x cuda -nocudainc -nocudalib --target=x86_64 -fbasic-block-sections=all --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s -c 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-CUDA %s // -- Gitee From d5978f0185b9b3482fda2f2e67496fade6875cfb Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Wed, 3 Jan 2024 19:17:44 -0500 Subject: [PATCH 19/47] [SHT_LLVM_BB_ADDR_MAP][AsmPrinter] Implements PGOAnalysisMap emitting in AsmPrinter with tests. (#75202) Uses machine analyses to emit PGOAnalysisMap into the bb-addr-map ELF section. Implements filecheck tests to verify emitting new fields. This patch emits optional PGO related analyses into the bb-addr-map ELF section during AsmPrinter. This currently supports Function Entry Count, Machine Block Frequencies. and Machine Branch Probabilities. Each is independently enabled via the `feature` byte of `bb-addr-map` for the given function. A part of [RFC - PGO Accuracy Metrics: Emitting and Evaluating Branch and Block Analysis](https://discourse.llvm.org/t/rfc-pgo-accuracy-metrics-emitting-and-evaluating-branch-and-block-analysis/73902). --- llvm/docs/Extensions.rst | 84 ++++++++++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 80 ++++++++++- ...ic-block-sections-labels-empty-function.ll | 6 +- ...asic-block-sections-labels-pgo-features.ll | 127 ++++++++++++++++++ 4 files changed, 292 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst index 6e9484089783..74ca8cb0aa68 100644 --- a/llvm/docs/Extensions.rst +++ b/llvm/docs/Extensions.rst @@ -451,6 +451,90 @@ Example: .uleb128 .LBB_END0_1-.LBB0_1 # BB_1 size .byte y # BB_1 metadata +PGO Analysis Map +"""""""""""""""" + +PGO related analysis data can be emitted after each function within the +``SHT_LLVM_BB_ADDR_MAP`` through the optional ``pgo-analysis-map`` flag. +Supported analyses currently are Function Entry Count, Basic Block Frequencies, +and Branch Probabilities. + +Each analysis is enabled or disabled via a bit in the feature byte. Currently +those bits are: + +#. Function Entry Count - Number of times the function was called as taken + from a PGO profile. This will always be zero if PGO was not used or the + function was not encountered in the profile. + +#. Basic Block Frequencies - Encoded as raw block frequency value taken from + MBFI analysis. This value is an integer that encodes the relative frequency + compared to the entry block. More information can be found in + 'llvm/Support/BlockFrequency.h'. + +#. Branch Probabilities - Encoded as raw numerator for branch probability + taken from MBPI analysis. This value is the numerator for a fixed point ratio + defined in 'llvm/Support/BranchProbability.h'. It indicates the probability + that the block is followed by a given successor block during execution. + +This extra data requires version 2 or above. This is necessary since successors +of basic blocks won't know their index but will know their BB ID. + +Example of BBAddrMap with PGO data: + +.. code-block:: gas + + .section ".llvm_bb_addr_map","",@llvm_bb_addr_map + .byte 2 # version number + .byte 7 # feature byte - PGO analyses enabled mask + .quad .Lfunc_begin0 # address of the function + .uleb128 4 # number of basic blocks + # BB record for BB_0 + .uleb128 0 # BB_0 BB ID + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # BB_0 offset relative to function entry (always zero) + .uleb128 .LBB_END0_0-.Lfunc_begin0 # BB_0 size + .byte 0x18 # BB_0 metadata (multiple successors) + # BB record for BB_1 + .uleb128 1 # BB_1 BB ID + .uleb128 .LBB0_1-.LBB_END0_0 # BB_1 offset relative to the end of last block (BB_0). + .uleb128 .LBB_END0_1-.LBB0_1 # BB_1 size + .byte 0x0 # BB_1 metadata (two successors) + # BB record for BB_2 + .uleb128 2 # BB_2 BB ID + .uleb128 .LBB0_2-.LBB_END1_0 # BB_2 offset relative to the end of last block (BB_1). + .uleb128 .LBB_END0_2-.LBB0_2 # BB_2 size + .byte 0x0 # BB_2 metadata (one successor) + # BB record for BB_3 + .uleb128 3 # BB_3 BB ID + .uleb128 .LBB0_3-.LBB_END0_2 # BB_3 offset relative to the end of last block (BB_2). + .uleb128 .LBB_END0_3-.LBB0_3 # BB_3 size + .byte 0x0 # BB_3 metadata (zero successors) + # PGO Analysis Map + .uleb128 1000 # function entry count (only when enabled) + # PGO data record for BB_0 + .uleb128 1000 # BB_0 basic block frequency (only when enabled) + .uleb128 3 # BB_0 successors count (only enabled with branch probabilities) + .uleb128 1 # BB_0 successor 1 BB ID (only enabled with branch probabilities) + .uleb128 0x22222222 # BB_0 successor 1 branch probability (only enabled with branch probabilities) + .uleb128 2 # BB_0 successor 2 BB ID (only enabled with branch probabilities) + .uleb128 0x33333333 # BB_0 successor 2 branch probability (only enabled with branch probabilities) + .uleb128 3 # BB_0 successor 3 BB ID (only enabled with branch probabilities) + .uleb128 0xaaaaaaaa # BB_0 successor 3 branch probability (only enabled with branch probabilities) + # PGO data record for BB_1 + .uleb128 133 # BB_1 basic block frequency (only when enabled) + .uleb128 2 # BB_1 successors count (only enabled with branch probabilities) + .uleb128 2 # BB_1 successor 1 BB ID (only enabled with branch probabilities) + .uleb128 0x11111111 # BB_1 successor 1 branch probability (only enabled with branch probabilities) + .uleb128 3 # BB_1 successor 2 BB ID (only enabled with branch probabilities) + .uleb128 0x11111111 # BB_1 successor 2 branch probability (only enabled with branch probabilities) + # PGO data record for BB_2 + .uleb128 18 # BB_2 basic block frequency (only when enabled) + .uleb128 1 # BB_2 successors count (only enabled with branch probabilities) + .uleb128 3 # BB_2 successor 1 BB ID (only enabled with branch probabilities) + .uleb128 0xffffffff # BB_2 successor 1 branch probability (only enabled with branch probabilities) + # PGO data record for BB_3 + .uleb128 1000 # BB_3 basic block frequency (only when enabled) + .uleb128 0 # BB_3 successors count (only enabled with branch probabilities) + ``SHT_LLVM_OFFLOADING`` Section (offloading data) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This section stores the binary data used to perform offloading device linking diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 80652bc90a30..59ba672d08e6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -138,6 +139,26 @@ static cl::opt BasicBlockProfileDump( "performed with -basic-block-sections=labels. Enabling this " "flag during in-process ThinLTO is not supported.")); +// This is a replication of fields of object::PGOAnalysisMap::Features. It +// should match the order of the fields so that +// `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())` +// succeeds. +enum class PGOMapFeaturesEnum { + FuncEntryCount, + BBFreq, + BrProb, +}; +static cl::bits PgoAnalysisMapFeatures( + "pgo-analysis-map", cl::Hidden, cl::CommaSeparated, + cl::values(clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, + "func-entry-count", "Function Entry Count"), + clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq", + "Basic Block Frequency"), + clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", + "Branch Probability")), + cl::desc("Enable extended information within the BBAddrMap that is " + "extracted from PGO related analysis.")); + const char DWARFGroupName[] = "dwarf"; const char DWARFGroupDescription[] = "DWARF Emission"; const char DbgTimerName[] = "emit"; @@ -425,6 +446,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1356,7 +1378,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); - OutStreamer->emitInt8(0); + auto FeaturesBits = static_cast(PgoAnalysisMapFeatures.getBits()); + OutStreamer->emitInt8(FeaturesBits); OutStreamer->AddComment("function address"); OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); OutStreamer->AddComment("number of basic blocks"); @@ -1386,6 +1409,51 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); PrevMBBEndSymbol = MBB.getEndSymbol(); } + + if (FeaturesBits != 0) { + assert(BBAddrMapVersion >= 2 && + "PGOAnalysisMap only supports version 2 or later"); + + auto FeatEnable = + cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits)); + + if (FeatEnable.FuncEntryCount) { + OutStreamer->AddComment("function entry count"); + auto MaybeEntryCount = MF.getFunction().getEntryCount(); + OutStreamer->emitULEB128IntValue( + MaybeEntryCount ? MaybeEntryCount->getCount() : 0); + } + const MachineBlockFrequencyInfo *MBFI = + FeatEnable.BBFreq + ? &getAnalysis().getBFI() + : nullptr; + const MachineBranchProbabilityInfo *MBPI = + FeatEnable.BrProb ? &getAnalysis() + : nullptr; + + if (FeatEnable.BBFreq || FeatEnable.BrProb) { + for (const MachineBasicBlock &MBB : MF) { + if (FeatEnable.BBFreq) { + OutStreamer->AddComment("basic block frequency"); + OutStreamer->emitULEB128IntValue( + MBFI->getBlockFreq(&MBB).getFrequency()); + } + if (FeatEnable.BrProb) { + unsigned SuccCount = MBB.succ_size(); + OutStreamer->AddComment("basic block successor count"); + OutStreamer->emitULEB128IntValue(SuccCount); + for (const MachineBasicBlock *SuccMBB : MBB.successors()) { + OutStreamer->AddComment("successor BB ID"); + OutStreamer->emitULEB128IntValue(SuccMBB->getBBID()->BaseID); + OutStreamer->AddComment("successor branch probability"); + OutStreamer->emitULEB128IntValue( + MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator()); + } + } + } + } + } + OutStreamer->popSection(); } @@ -1906,8 +1974,14 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. - if (MF->hasBBLabels() && HasAnyRealCode) - emitBBAddrMapSection(*MF); + if (HasAnyRealCode) { + if (MF->hasBBLabels()) + emitBBAddrMapSection(*MF); + else if (PgoAnalysisMapFeatures.getBits() != 0) + MF->getContext().reportWarning( + SMLoc(), "pgo-analysis-map is enabled for function " + MF->getName() + + " but it does not have labels"); + } // Emit sections containing instruction and function PCs. emitPCSections(*MF); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll index 7b7bbb95fb4e..42d09212e669 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll @@ -1,5 +1,6 @@ ;; Verify that the BB address map is not emitted for empty functions. -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO define void @empty_func() { entry: @@ -19,5 +20,6 @@ entry: ; CHECK: .Lfunc_begin1: ; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text{{$}} ; CHECK-NEXT: .byte 2 # version -; CHECK-NEXT: .byte 0 # feature +; BASIC-NEXT: .byte 0 # feature +; PGO-NEXT: .byte 3 # feature ; CHECK-NEXT: .quad .Lfunc_begin1 # function address diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll new file mode 100644 index 000000000000..ebfc003c50f5 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll @@ -0,0 +1,127 @@ +; Check the basic block sections labels option +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC + +;; Also verify this holds for all PGO features enabled +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP + +;; Also verify that pgo extension only includes the enabled feature +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY + + +define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0 { + br i1 %0, label %3, label %8, !prof !1 + +3: + %4 = invoke i32 @_Z3barv() + to label %8 unwind label %6 + br label %10 + +6: + landingpad { ptr, i32 } + catch ptr null + br label %12 + +8: + %9 = call i32 @_Z3foov() + br i1 %1, label %12, label %10, !prof !2 + +10: + %11 = select i1 %1, ptr blockaddress(@_Z3bazb, %3), ptr blockaddress(@_Z3bazb, %12) ; [#uses=1] + indirectbr ptr %11, [label %3, label %12], !prof !3 + +12: + ret void +} + +declare i32 @_Z3barv() #1 + +declare i32 @_Z3foov() #1 + +declare i32 @__gxx_personality_v0(...) + +!0 = !{!"function_entry_count", i64 100} +!1 = !{!"branch_weights", i32 80, i32 20} +!2 = !{!"branch_weights", i32 70, i32 10} +!3 = !{!"branch_weights", i32 15, i32 5} + +; CHECK: .section .text._Z3bazb,"ax",@progbits{{$}} +; CHECK-LABEL: _Z3bazb: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK-LABEL: .LBB_END0_0: +; CHECK-LABEL: .LBB0_1: +; CHECK-LABEL: .LBB_END0_1: +; CHECK-LABEL: .LBB0_2: +; CHECK-LABEL: .LBB_END0_2: +; CHECK-LABEL: .LBB0_3: +; CHECK-LABEL: .LBB_END0_3: +; CHECK-LABEL: .Lfunc_end0: + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text._Z3bazb{{$}} +; CHECK-NEXT: .byte 2 # version +; BASIC-NEXT: .byte 0 # feature +; PGO-ALL-NEXT: .byte 7 # feature +; FEC-ONLY-NEXT:.byte 1 # feature +; BBF-ONLY-NEXT:.byte 2 # feature +; BRP-ONLY-NEXT:.byte 4 # feature +; CHECK-NEXT: .quad .Lfunc_begin0 # function address +; CHECK-NEXT: .byte 6 # number of basic blocks +; CHECK-NEXT: .byte 0 # BB id +; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 1 # BB id +; CHECK-NEXT: .uleb128 .LBB0_1-.LBB_END0_0 +; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 3 # BB id +; CHECK-NEXT: .uleb128 .LBB0_2-.LBB_END0_1 +; CHECK-NEXT: .uleb128 .LBB_END0_2-.LBB0_2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 # BB id +; CHECK-NEXT: .uleb128 .LBB0_3-.LBB_END0_2 +; CHECK-NEXT: .uleb128 .LBB_END0_3-.LBB0_3 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 4 # BB id +; CHECK-NEXT: .uleb128 .LBB0_4-.LBB_END0_3 +; CHECK-NEXT: .uleb128 .LBB_END0_4-.LBB0_4 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 2 # BB id +; CHECK-NEXT: .uleb128 .LBB0_5-.LBB_END0_4 +; CHECK-NEXT: .uleb128 .LBB_END0_5-.LBB0_5 +; CHECK-NEXT: .byte 4 + +;; PGO Analysis Map +; PGO-FEC-NEXT: .byte 100 # function entry count +; PGO-BBF-NEXT: .ascii "\260\374\317\003" # basic block frequency +; PGO-BRP-NEXT: .byte 2 # basic block successor count +; PGO-BRP-NEXT: .byte 1 # successor BB ID +; PGO-BRP-NEXT: .ascii "\346\314\231\263\006" # successor branch probability +; PGO-BRP-NEXT: .byte 3 # successor BB ID +; PGO-BRP-NEXT: .ascii "\232\263\346\314\001" # successor branch probability +; PGO-BBF-NEXT: .ascii "\207\342W" # basic block frequency +; PGO-BRP-NEXT: .byte 2 # basic block successor count +; PGO-BRP-NEXT: .byte 3 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\360\377\377\007" # successor branch probability +; PGO-BRP-NEXT: .byte 2 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\020" # successor branch probability +; PGO-BBF-NEXT: .ascii "\347\225\250\007" # basic block frequency +; PGO-BRP-NEXT: .byte 2 # basic block successor count +; PGO-BRP-NEXT: .byte 5 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\200\200\200\007" # successor branch probability +; PGO-BRP-NEXT: .byte 4 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\200\200\200\001" # successor branch probability +; PGO-BBF-NEXT: .ascii "\260\374\317\003" # basic block frequency +; PGO-BRP-NEXT: .byte 0 # basic block successor count +; PGO-BBF-NEXT: .ascii "\276\377?" # basic block frequency +; PGO-BRP-NEXT: .byte 2 # basic block successor count +; PGO-BRP-NEXT: .byte 1 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\200\200\200\006" # successor branch probability +; PGO-BRP-NEXT: .byte 5 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\200\200\200\002" # successor branch probability +; PGO-BBF-NEXT: .byte 8 # basic block frequency +; PGO-BRP-NEXT: .byte 1 # basic block successor count +; PGO-BRP-NEXT: .byte 5 # successor BB ID +; PGO-BRP-NEXT: .ascii "\200\200\200\200\b" # successor branch probability + -- Gitee From 976ee2afed62098df514f3cb3976f9491fff8fb1 Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Fri, 5 Jan 2024 21:59:51 -0500 Subject: [PATCH 20/47] [SHT_LLVM_BB_ADDR_MAP] Fixes two bugs in decoding of PGOAnalyses in BBAddrMap. (#77139) We had specified that `readBBAddrMap` will always keep PGOAnalyses and BBAddrMaps the same length on success. https://github.com/llvm/llvm-project/blob/365fbbfbcfefb8766f7716109b9c3767b58e6058/llvm/include/llvm/Object/ELFObjectFile.h#L116-L117 It turns out that this is not currently the case when no analyses exist in a function. No test had caught it. We also should not append PGOBBEntries when there is no BBFreq or BrProb. This patch adds: * tests that PGOAnalyses and BBAddrMaps are same length even when no analyses are enabled * fixes decode so that PGOAnalyses and BBAddrMaps are same length * updates test to not emit unnecessary PGOBBEntries * fixes decode to not emit PGOBBEntries when unnecessary --- llvm/include/llvm/Object/ELFTypes.h | 3 ++ llvm/lib/Object/ELF.cpp | 7 ++-- llvm/unittests/Object/ELFObjectFileTest.cpp | 37 ++++++++++++++++----- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index ea0f8cec6b7b..84f026a926d6 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -886,6 +886,9 @@ struct PGOAnalysisMap { bool BBFreq : 1; bool BrProb : 1; + // True if at least one feature is enabled + bool anyEnabled() const { return FuncEntryCount || BBFreq || BrProb; } + // Encodes to minimum bit width representation. uint8_t encode() const { return (static_cast(FuncEntryCount) << 0) | diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 17668da5a8c6..e9715998b532 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -773,7 +773,7 @@ decodeBBAddrMapImpl(const ELFFile &EF, } FunctionEntries.emplace_back(Address, std::move(BBEntries)); - if (FeatEnable.FuncEntryCount || FeatEnable.BBFreq || FeatEnable.BrProb) { + if (PGOAnalyses || FeatEnable.anyEnabled()) { // Function entry count uint64_t FuncEntryCount = FeatEnable.FuncEntryCount @@ -781,8 +781,9 @@ decodeBBAddrMapImpl(const ELFFile &EF, : 0; std::vector PGOBBEntries; - for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && - (BlockIndex < NumBlocks); + for (uint32_t BlockIndex = 0; + (FeatEnable.BBFreq || FeatEnable.BrProb) && !MetadataDecodeErr && + !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); ++BlockIndex) { // Block frequency uint64_t BBF = FeatEnable.BBFreq diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 2878ca088cd7..502db0862ab8 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -984,10 +984,23 @@ Sections: BrProb: 0xffffffff - BBFreq: 1000 Successors: [] -)"); + - Name: .llvm_bb_addr_map_5 + Type: SHT_LLVM_BB_ADDR_MAP + # Link: 0 (by default, can be overriden) + Entries: + - Version: 2 + Address: 0x55555 + Feature: 0x0 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 + PGOAnalyses: [{}] + )"); BBAddrMap E1(0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}); - PGOAnalysisMap P1 = {892, {{}}, {true, false, false}}; + PGOAnalysisMap P1 = {892, {}, {true, false, false}}; BBAddrMap E2(0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}); PGOAnalysisMap P2 = {{}, {{BlockFrequency(343), {}}}, {false, true, false}}; BBAddrMap E3(0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}, @@ -1016,16 +1029,18 @@ Sections: {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, {BlockFrequency(1000), {}}}, {true, true, true}}; + BBAddrMap E5(0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}}}); + PGOAnalysisMap P5 = {{}, {}, {false, false, false}}; - std::vector Section0BBAddrMaps = {E4}; + std::vector Section0BBAddrMaps = {E4, E5}; std::vector Section1BBAddrMaps = {E3}; std::vector Section2BBAddrMaps = {E1, E2}; - std::vector AllBBAddrMaps = {E1, E2, E3, E4}; + std::vector AllBBAddrMaps = {E1, E2, E3, E4, E5}; - std::vector Section0PGOAnalysisMaps = {P4}; + std::vector Section0PGOAnalysisMaps = {P4, P5}; std::vector Section1PGOAnalysisMaps = {P3}; std::vector Section2PGOAnalysisMaps = {P1, P2}; - std::vector AllPGOAnalysisMaps = {P1, P2, P3, P4}; + std::vector AllPGOAnalysisMaps = {P1, P2, P3, P4, P5}; auto DoCheckSucceeds = [&](StringRef YamlString, std::optional TextSectionIndex, @@ -1048,6 +1063,10 @@ Sections: if (ExpectedPGO) { EXPECT_EQ(BBAddrMaps->size(), PGOAnalyses.size()); EXPECT_EQ(PGOAnalyses, *ExpectedPGO); + for (auto &&[BB, PGO] : llvm::zip(*BBAddrMaps, PGOAnalyses)) { + if (PGO.FeatEnable.BBFreq || PGO.FeatEnable.BrProb) + EXPECT_EQ(BB.getBBEntries().size(), PGO.BBEntries.size()); + } } }; @@ -1099,9 +1118,9 @@ Sections: Link: 10 )"; - DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/4, + DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/5, "unable to get the linked-to section for " - "SHT_LLVM_BB_ADDR_MAP section with index 4: invalid section " + "SHT_LLVM_BB_ADDR_MAP section with index 5: invalid section " "index: 10"); // Linked sections are not checked when we don't target a specific text // section. @@ -1117,7 +1136,7 @@ Sections: )"; DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, - "unable to read SHT_LLVM_BB_ADDR_MAP section with index 4: " + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 5: " "unable to decode LEB128 at offset 0x0000000a: malformed " "uleb128, extends past end"); // Check that we can read the other section's bb-address-maps which are -- Gitee From a0b0de45a005ec3e8221c018d1dbe7290dbeaa81 Mon Sep 17 00:00:00 2001 From: Nick Anderson Date: Mon, 8 Jan 2024 22:32:59 -0800 Subject: [PATCH 21/47] =?UTF-8?q?Port=20CodeGenPrepare=20to=20new=20pass?= =?UTF-8?q?=20manager=20(and=20BasicBlockSectionsProfil=E2=80=A6=20(#77182?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port CodeGenPrepare to new pass manager and dependency BasicBlockSectionsProfileReader Fixes: #75380 Co-authored-by: Krishna-13-cyber <84722531+Krishna-13-cyber@users.noreply.github.com> --- .../CodeGen/BasicBlockSectionsProfileReader.h | 83 +++++++++--- llvm/include/llvm/CodeGen/CodeGenPrepare.h | 35 +++++ llvm/include/llvm/CodeGen/Passes.h | 4 +- llvm/include/llvm/InitializePasses.h | 4 +- llvm/include/llvm/LinkAllPasses.h | 2 +- llvm/lib/CodeGen/BasicBlockPathCloning.cpp | 11 +- llvm/lib/CodeGen/BasicBlockSections.cpp | 8 +- .../BasicBlockSectionsProfileReader.cpp | 52 ++++++-- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 125 ++++++++++++------ llvm/lib/CodeGen/TargetPassConfig.cpp | 4 +- llvm/lib/Passes/PassBuilder.cpp | 2 + llvm/lib/Passes/PassRegistry.def | 2 + .../AArch64/aarch64-codegen-prepare-atp.ll | 2 +- llvm/test/CodeGen/AArch64/and-sink.ll | 4 +- .../CodeGen/AArch64/arm64-bitfield-extract.ll | 2 +- .../AArch64/arm64-codegen-prepare-extload.ll | 6 +- .../test/CodeGen/AArch64/arm64_32-gep-sink.ll | 2 +- .../CodeGen/AArch64/cgp-trivial-phi-node.ll | 2 +- llvm/test/CodeGen/AArch64/convertphitype.ll | 2 +- .../AArch64/scalable-vector-promotion.ll | 2 +- llvm/test/CodeGen/AArch64/sve-vscale.ll | 2 +- .../AMDGPU/cgp-addressing-modes-flat.ll | 8 +- .../AMDGPU/cgp-addressing-modes-gfx1030.ll | 2 +- .../AMDGPU/cgp-addressing-modes-gfx908.ll | 2 +- .../CodeGen/AMDGPU/cgp-addressing-modes.ll | 8 +- llvm/test/CodeGen/ARM/vector-promotion.ll | 4 +- .../Generic/addr-sink-call-multi-arg.ll | 2 +- llvm/test/CodeGen/Generic/addr-use-count.ll | 2 +- .../test/CodeGen/X86/callbr-codegenprepare.ll | 2 +- .../X86/codegen-prepare-addrmode-sext.ll | 2 +- .../CodeGen/X86/codegen-prepare-extload.ll | 6 +- llvm/test/CodeGen/X86/convertphitype.ll | 2 +- llvm/test/CodeGen/X86/pr58538.ll | 4 +- llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 2 +- llvm/test/CodeGen/X86/tailcall-extract.ll | 2 +- llvm/test/DebugInfo/ARM/salvage-debug-info.ll | 2 +- llvm/test/DebugInfo/X86/zextload.ll | 2 +- llvm/test/Other/codegenprepare-and-debug.ll | 2 +- .../AArch64/combine-address-mode.ll | 2 +- .../CodeGenPrepare/AArch64/free-zext.ll | 2 +- .../gather-scatter-opt-inseltpoison.ll | 2 +- .../AArch64/gather-scatter-opt.ll | 2 +- .../AArch64/overflow-intrinsics.ll | 6 +- .../AArch64/trunc-weird-user.ll | 2 +- .../CodeGenPrepare/AArch64/zext-to-shuffle.ll | 2 +- .../CodeGenPrepare/AMDGPU/addressing-modes.ll | 2 +- .../AMDGPU/no-sink-addrspacecast.ll | 2 +- .../AMDGPU/sink-addrspacecast.ll | 2 +- .../CodeGenPrepare/ARM/branch-on-zero.ll | 2 +- .../Transforms/CodeGenPrepare/ARM/dead-gep.ll | 2 +- .../CodeGenPrepare/ARM/memory-intrinsics.ll | 2 +- .../CodeGenPrepare/ARM/overflow-intrinsics.ll | 2 +- .../CodeGenPrepare/ARM/sink-addrmode.ll | 2 +- .../Transforms/CodeGenPrepare/ARM/splitgep.ll | 2 +- .../CodeGenPrepare/ARM/tailcall-dup.ll | 2 +- .../bypass-slow-div-constant-numerator.ll | 2 +- .../NVPTX/bypass-slow-div-not-exact.ll | 2 +- .../NVPTX/bypass-slow-div-special-cases.ll | 2 +- .../CodeGenPrepare/NVPTX/bypass-slow-div.ll | 2 +- .../NVPTX/dont-introduce-addrspacecast.ll | 2 +- .../NVPTX/dont-sink-nop-addrspacecast.ll | 2 +- .../PowerPC/split-store-alignment.ll | 4 +- .../CodeGenPrepare/RISCV/and-mask-sink.ll | 8 +- .../CodeGenPrepare/RISCV/cttz-ctlz.ll | 2 +- .../SPARC/overflow-intrinsics.ll | 6 +- .../CodeGenPrepare/X86/catchpad-phi-cast.ll | 2 +- .../X86/cgp_shuffle_crash-inseltpoison.ll | 2 +- .../CodeGenPrepare/X86/cgp_shuffle_crash.ll | 2 +- .../CodeGenPrepare/X86/computedgoto.ll | 2 +- .../CodeGenPrepare/X86/cttz-ctlz.ll | 6 +- .../X86/delete-assume-dead-code.ll | 2 +- .../CodeGenPrepare/X86/extend-sink-hoist.ll | 2 +- .../CodeGenPrepare/X86/freeze-brcond.ll | 2 +- .../X86/gather-scatter-opt-inseltpoison.ll | 4 +- .../CodeGenPrepare/X86/gather-scatter-opt.ll | 2 +- .../CodeGenPrepare/X86/gep-unmerging.ll | 2 +- .../CodeGenPrepare/X86/invariant.group.ll | 2 +- .../X86/masked-gather-struct-gep.ll | 2 +- .../CodeGenPrepare/X86/nonintegral.ll | 4 +- .../CodeGenPrepare/X86/optimizeSelect-DT.ll | 2 +- .../CodeGenPrepare/X86/overflow-intrinsics.ll | 6 +- .../Transforms/CodeGenPrepare/X86/pr27536.ll | 2 +- .../Transforms/CodeGenPrepare/X86/pr35658.ll | 2 +- .../recursively-delete-dead-instructions.ll | 2 +- .../CodeGenPrepare/X86/remove-assume-block.ll | 2 +- .../Transforms/CodeGenPrepare/X86/select.ll | 4 +- .../CodeGenPrepare/X86/sink-addrmode-base.ll | 4 +- .../X86/sink-addrmode-inseltpoison.ll | 2 +- .../X86/sink-addrmode-select.ll | 2 +- .../X86/sink-addrmode-two-phi.ll | 2 +- .../CodeGenPrepare/X86/sink-addrmode.ll | 2 +- .../CodeGenPrepare/X86/sink-addrspacecast.ll | 2 +- .../CodeGenPrepare/X86/split-indirect-loop.ll | 2 +- .../X86/split-store-alignment.ll | 2 +- .../CodeGenPrepare/X86/statepoint-relocate.ll | 2 +- .../CodeGenPrepare/X86/tailcall-assume-xbb.ll | 2 +- .../X86/vec-shift-inseltpoison.ll | 14 +- .../CodeGenPrepare/X86/vec-shift.ll | 14 +- .../CodeGenPrepare/X86/widenable-condition.ll | 2 +- .../X86/x86-shuffle-sink-inseltpoison.ll | 8 +- .../CodeGenPrepare/X86/x86-shuffle-sink.ll | 8 +- .../CodeGenPrepare/dead-allocation.ll | 2 +- .../CodeGenPrepare/skip-merging-case-block.ll | 2 +- .../Transforms/HotColdSplit/coldentrycount.ll | 2 +- .../codegenprepare-produced-address-math.ll | 2 +- .../section-accurate-samplepgo.ll | 6 +- llvm/tools/opt/opt.cpp | 2 +- 108 files changed, 392 insertions(+), 230 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/CodeGenPrepare.h diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index dfb8d5d9f2f5..bba675f1d3eb 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -21,11 +21,14 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetMachine.h" + using namespace llvm; namespace llvm { @@ -72,25 +75,13 @@ template <> struct DenseMapInfo { } }; -class BasicBlockSectionsProfileReader : public ImmutablePass { +class BasicBlockSectionsProfileReader { public: - static char ID; - + friend class BasicBlockSectionsProfileReaderWrapperPass; BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) - : ImmutablePass(ID), MBuf(Buf), - LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - }; + : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){}; - BasicBlockSectionsProfileReader() : ImmutablePass(ID) { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Basic Block Sections Profile Reader"; - } + BasicBlockSectionsProfileReader(){}; // Returns true if basic block sections profile exist for function \p // FuncName. @@ -109,10 +100,6 @@ public: SmallVector> getClonePathsForFunction(StringRef FuncName) const; - // Initializes the FunctionNameToDIFilename map for the current module and - // then reads the profile for the matching functions. - bool doInitialization(Module &M) override; - private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -170,7 +157,61 @@ private: // sections profile. \p Buf is a memory buffer that contains the list of // functions and basic block ids to selectively enable basic block sections. ImmutablePass * -createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); +createBasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer *Buf); + +/// Analysis pass providing the \c BasicBlockSectionsProfileReader. +/// +/// Note that this pass's result cannot be invalidated, it is immutable for the +/// life of the module. +class BasicBlockSectionsProfileReaderAnalysis + : public AnalysisInfoMixin { + +public: + static AnalysisKey Key; + typedef BasicBlockSectionsProfileReader Result; + BasicBlockSectionsProfileReaderAnalysis(const TargetMachine *TM) : TM(TM) {} + + Result run(Function &F, FunctionAnalysisManager &AM); + +private: + const TargetMachine *TM; +}; + +class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { +public: + static char ID; + BasicBlockSectionsProfileReader BBSPR; + + BasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer *Buf) + : ImmutablePass(ID), BBSPR(BasicBlockSectionsProfileReader(Buf)) { + initializeBasicBlockSectionsProfileReaderWrapperPassPass( + *PassRegistry::getPassRegistry()); + }; + + BasicBlockSectionsProfileReaderWrapperPass() + : ImmutablePass(ID), BBSPR(BasicBlockSectionsProfileReader()) { + initializeBasicBlockSectionsProfileReaderWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Profile Reader"; + } + + bool isFunctionHot(StringRef FuncName) const; + + std::pair> + getClusterInfoForFunction(StringRef FuncName) const; + + SmallVector> + getClonePathsForFunction(StringRef FuncName) const; + + // Initializes the FunctionNameToDIFilename map for the current module and + // then reads the profile for the matching functions. + bool doInitialization(Module &M) override; + + BasicBlockSectionsProfileReader &getBBSPR(); +}; } // namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H diff --git a/llvm/include/llvm/CodeGen/CodeGenPrepare.h b/llvm/include/llvm/CodeGen/CodeGenPrepare.h new file mode 100644 index 000000000000..dee3a9ee53d7 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CodeGenPrepare.h @@ -0,0 +1,35 @@ +//===- CodeGenPrepare.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Defines an IR pass for CodeGen Prepare. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PREPARE_H +#define LLVM_CODEGEN_PREPARE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; +class TargetMachine; + +class CodeGenPreparePass : public PassInfoMixin { +private: + const TargetMachine *TM; + +public: + CodeGenPreparePass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_PREPARE_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 90f43897905c..f14294c5f12a 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -93,9 +93,9 @@ namespace llvm { MachineFunctionPass *createResetMachineFunctionPass(bool EmitFallbackDiag, bool AbortOnFailedISel); - /// createCodeGenPreparePass - Transform the code to expose more pattern + /// createCodeGenPrepareLegacyPass - Transform the code to expose more pattern /// matching during instruction selection. - FunctionPass *createCodeGenPreparePass(); + FunctionPass *createCodeGenPrepareLegacyPass(); /// This pass implements generation of target-specific intrinsics to support /// handling of complex number arithmetic diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 1ce24ef860b7..d70370404ec5 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -56,7 +56,7 @@ void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeBasicBlockPathCloningPass(PassRegistry &); -void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); +void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); @@ -82,7 +82,7 @@ void initializeCallGraphPrinterLegacyPassPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCheckDebugMachineModulePass(PassRegistry &); -void initializeCodeGenPreparePass(PassRegistry&); +void initializeCodeGenPrepareLegacyPassPass(PassRegistry &); void initializeComplexDeinterleavingLegacyPassPass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 3a8ecb1399f1..c4206fdbb8a3 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -141,7 +141,7 @@ namespace { (void) llvm::createUnifyFunctionExitNodesPass(); (void) llvm::createInstCountPass(); (void) llvm::createConstantHoistingPass(); - (void) llvm::createCodeGenPreparePass(); + (void)llvm::createCodeGenPrepareLegacyPass(); (void) llvm::createEarlyCSEPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index 5d5f3c3da481..901542e8507b 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -196,7 +196,7 @@ class BasicBlockPathCloning : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; BasicBlockPathCloning() : MachineFunctionPass(ID) { initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry()); @@ -218,7 +218,7 @@ INITIALIZE_PASS_BEGIN( BasicBlockPathCloning, "bb-path-cloning", "Applies path clonings for the -basic-block-sections=list option", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_END( BasicBlockPathCloning, "bb-path-cloning", "Applies path clonings for the -basic-block-sections=list option", false, @@ -230,13 +230,14 @@ bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) { if (hasInstrProfHashMismatch(MF)) return false; - return ApplyCloning(MF, getAnalysis() - .getClonePathsForFunction(MF.getName())); + return ApplyCloning(MF, + getAnalysis() + .getClonePathsForFunction(MF.getName())); } void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 42997d2287d6..94b5a503fbd0 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -103,7 +103,7 @@ class BasicBlockSections : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -128,7 +128,7 @@ INITIALIZE_PASS_BEGIN( "Prepares for basic block sections, by splitting functions " "into clusters of basic blocks.", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare", "Prepares for basic block sections, by splitting functions " "into clusters of basic blocks.", @@ -306,7 +306,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { auto [HasProfile, ClusterInfo] = - getAnalysis() + getAnalysis() .getClusterInfoForFunction(MF.getName()); if (!HasProfile) return false; @@ -362,7 +362,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 96662378a869..e4ee2ac63138 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -30,8 +30,9 @@ using namespace llvm; -char BasicBlockSectionsProfileReader::ID = 0; -INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", +char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, + "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) @@ -395,11 +396,11 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { } } -bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { - if (!MBuf) +bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { + if (!BBSPR.MBuf) return false; // Get the function name to debug info filename mapping. - FunctionNameToDIFilename.clear(); + BBSPR.FunctionNameToDIFilename.clear(); for (const Function &F : M) { SmallString<128> DIFilename; if (F.isDeclaration()) @@ -411,15 +412,46 @@ bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); } [[maybe_unused]] bool inserted = - FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second; + BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) + .second; assert(inserted); } - if (auto Err = ReadProfile()) + if (auto Err = BBSPR.ReadProfile()) report_fatal_error(std::move(Err)); return false; } -ImmutablePass * -llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { - return new BasicBlockSectionsProfileReader(Buf); +AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; + +BasicBlockSectionsProfileReader +BasicBlockSectionsProfileReaderAnalysis::run(Function &F, + FunctionAnalysisManager &AM) { + return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); +} + +bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( + StringRef FuncName) const { + return BBSPR.isFunctionHot(FuncName); +} + +std::pair> +BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( + StringRef FuncName) const { + return BBSPR.getClusterInfoForFunction(FuncName); +} + +SmallVector> +BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( + StringRef FuncName) const { + return BBSPR.getClonePathsForFunction(FuncName); +} + +BasicBlockSectionsProfileReader & +BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { + return BBSPR; +} + +ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( + const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReaderWrapperPass(Buf); } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 79a95ee0d747..d9400f6638ea 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -30,7 +30,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeCFIFixupPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCheckDebugMachineModulePass(Registry); - initializeCodeGenPreparePass(Registry); + initializeCodeGenPrepareLegacyPassPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDebugifyMachineModulePass(Registry); initializeDetectDeadLanesPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b00df0b6c6cb..ad2c97c6a683 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -296,7 +297,8 @@ using ValueToSExts = MapVector; class TypePromotionTransaction; -class CodeGenPrepare : public FunctionPass { +class CodeGenPrepare { + friend class CodeGenPrepareLegacyPass; const TargetMachine *TM = nullptr; const TargetSubtargetInfo *SubtargetInfo = nullptr; const TargetLowering *TLI = nullptr; @@ -360,6 +362,8 @@ class CodeGenPrepare : public FunctionPass { std::unique_ptr DT; public: + CodeGenPrepare(){}; + CodeGenPrepare(const TargetMachine *TM) : TM(TM){}; /// If encounter huge function, we need to limit the build time. bool IsHugeFunc = false; @@ -369,15 +373,7 @@ public: /// to insert such BB into FreshBBs for huge function. SmallSet FreshBBs; - static char ID; // Pass identification, replacement for typeid - - CodeGenPrepare() : FunctionPass(ID) { - initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void releaseMemory() override { + void releaseMemory() { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); @@ -386,17 +382,7 @@ public: BFI.reset(); } - StringRef getPassName() const override { return "CodeGen Prepare"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // FIXME: When we can selectively preserve passes, preserve the domtree. - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addUsedIfAvailable(); - } + bool run(Function &F, FunctionAnalysisManager &AM); private: template @@ -481,45 +467,108 @@ private: bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); void verifyBFIUpdates(Function &F); + bool _run(Function &F); +}; + +class CodeGenPrepareLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + CodeGenPrepareLegacyPass() : FunctionPass(ID) { + initializeCodeGenPrepareLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addUsedIfAvailable(); + } }; } // end anonymous namespace -char CodeGenPrepare::ID = 0; +char CodeGenPrepareLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, +bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + auto TM = &getAnalysis().getTM(); + CodeGenPrepare CGP(TM); + CGP.DL = &F.getParent()->getDataLayout(); + CGP.SubtargetInfo = TM->getSubtargetImpl(F); + CGP.TLI = CGP.SubtargetInfo->getTargetLowering(); + CGP.TRI = CGP.SubtargetInfo->getRegisterInfo(); + CGP.TLInfo = &getAnalysis().getTLI(F); + CGP.TTI = &getAnalysis().getTTI(F); + CGP.LI = &getAnalysis().getLoopInfo(); + CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI)); + CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI)); + CGP.PSI = &getAnalysis().getPSI(); + auto BBSPRWP = + getAnalysisIfAvailable(); + CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr; + + return CGP._run(F); +} + +INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE, "Optimize for code generation", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", - false, false) +INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE, + "Optimize for code generation", false, false) -FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } +FunctionPass *llvm::createCodeGenPrepareLegacyPass() { + return new CodeGenPrepareLegacyPass(); +} -bool CodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; +PreservedAnalyses CodeGenPreparePass::run(Function &F, + FunctionAnalysisManager &AM) { + CodeGenPrepare CGP(TM); - DL = &F.getParent()->getDataLayout(); + bool Changed = CGP.run(F, AM); + if (!Changed) + return PreservedAnalyses::all(); - bool EverMadeChange = false; + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} - TM = &getAnalysis().getTM(); +bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) { + DL = &F.getParent()->getDataLayout(); SubtargetInfo = TM->getSubtargetImpl(F); TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); - TLInfo = &getAnalysis().getTLI(F); - TTI = &getAnalysis().getTTI(F); - LI = &getAnalysis().getLoopInfo(); + TLInfo = &AM.getResult(F); + TTI = &AM.getResult(F); + LI = &AM.getResult(F); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - PSI = &getAnalysis().getPSI(); + auto &MAMProxy = AM.getResult(F); + PSI = MAMProxy.getCachedResult(*F.getParent()); BBSectionsProfileReader = - getAnalysisIfAvailable(); + AM.getCachedResult(F); + return _run(F); +} + +bool CodeGenPrepare::_run(Function &F) { + bool EverMadeChange = false; + OptSize = F.hasOptSize(); // Use the basic-block-sections profile to promote hot functions to .text.hot // if requested. diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 3396a9884e40..5133702c0a9b 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -969,7 +969,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass()); + addPass(createCodeGenPrepareLegacyPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for @@ -1263,7 +1263,7 @@ void TargetPassConfig::addMachinePasses() { // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { - addPass(llvm::createBasicBlockSectionsProfileReaderPass( + addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); addPass(llvm::createBasicBlockPathCloningPass()); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e2fe3322aef4..74ef2510cc6b 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -71,6 +71,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/TypePromotion.h" #include "llvm/IR/DebugInfo.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 6ef0d6791ff2..684c30e9dc22 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -255,6 +255,7 @@ CGSCC_PASS_WITH_PARAMS("function-attrs", #endif FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) +FUNCTION_ANALYSIS("bb-sections-profile-reader", BasicBlockSectionsProfileReaderAnalysis(TM)) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) @@ -312,6 +313,7 @@ FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) +FUNCTION_PASS("codegenprepare", CodeGenPreparePass(TM)) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("count-visits", CountVisitsPass()) FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass()) diff --git a/llvm/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll b/llvm/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll index 92f29dac13fa..10b594978bee 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare < %s -S | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' < %s -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll index f4e9551259e4..4d085869de24 100644 --- a/llvm/test/CodeGen/AArch64/and-sink.ll +++ b/llvm/test/CodeGen/AArch64/and-sink.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s -; RUN: opt -S -codegenprepare -cgpp-huge-func=0 -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s +; RUN: opt -S -passes='require,function(codegenprepare)' -cgpp-huge-func=0 -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s @A = dso_local global i32 zeroinitializer @B = dso_local global i32 zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll index caa5a7f9ead1..e46790d69c8a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck --check-prefix=LLC %s %struct.X = type { i8, i8, [2 x i8] } diff --git a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll index 889a76b37ebe..e44c54fe1ba2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -1,6 +1,6 @@ -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE ; CodeGenPrepare should move the zext into the block with the load ; so that SelectionDAG can select it with the load. diff --git a/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll index 74fc7b317708..592d7eb5bd2b 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -mtriple=arm64_32-apple-ios %s -S -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=arm64_32-apple-ios %s -S -o - | FileCheck %s define void @test_simple_sink(ptr %base, i64 %offset) { ; CHECK-LABEL: @test_simple_sink diff --git a/llvm/test/CodeGen/AArch64/cgp-trivial-phi-node.ll b/llvm/test/CodeGen/AArch64/cgp-trivial-phi-node.ll index 98b820709e82..dfcedc8dd984 100644 --- a/llvm/test/CodeGen/AArch64/cgp-trivial-phi-node.ll +++ b/llvm/test/CodeGen/AArch64/cgp-trivial-phi-node.ll @@ -1,5 +1,5 @@ ; Checks that case when GEP is bound to trivial PHI node is correctly handled. -; RUN: opt %s -mtriple=aarch64-linux-gnu -codegenprepare -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-linux-gnu -passes='require,function(codegenprepare)' -S -o - | FileCheck %s ; CHECK: define void @crash(ptr %s, i32 %n) { ; CHECK-NEXT: entry: diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll index a5fc46d2abca..b723b470266a 100644 --- a/llvm/test/CodeGen/AArch64/convertphitype.ll +++ b/llvm/test/CodeGen/AArch64/convertphitype.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -cgp-optimize-phi-types %s -S | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -cgp-optimize-phi-types %s -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll index e6ab52dc9e61..a45f3733dbda 100644 --- a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll +++ b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=aarch64 -codegenprepare -S < %s | FileCheck %s +; RUN: opt -mtriple=aarch64 -passes='require,function(codegenprepare)' -S < %s | FileCheck %s ; This test intends to check vector promotion for scalable vector. Current target lowering ; rejects scalable vector before reaching getConstantVector() in CodeGenPrepare. This test diff --git a/llvm/test/CodeGen/AArch64/sve-vscale.ll b/llvm/test/CodeGen/AArch64/sve-vscale.ll index fa48808ff7f8..7214c98cc318 100644 --- a/llvm/test/CodeGen/AArch64/sve-vscale.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s -; RUN: opt -mtriple=aarch64 -codegenprepare -S < %s | llc -mtriple=aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s +; RUN: opt -mtriple=aarch64 -passes='require,function(codegenprepare)' -S < %s | llc -mtriple=aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s ; ; RDVL diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll index 3005b17e0524..88a8e7cc4220 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll index aee6f0e82d25..1588dde19cfb 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=OPT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GCN %s ; Make sure we match the addressing mode offset of csub intrinsics across blocks. diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll index 494b4b5c48ba..ac50fb86c96f 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=OPT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s ; Make sure we match the addressing mode offset of globla.atomic.fadd intrinsics across blocks. diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 13b4b4786b94..90c226d98e86 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI -check-prefix=OPT-SICIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-SICIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-SICIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI -check-prefix=OPT-SICIVI %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-SICIVI %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-SICIVI %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICIVI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll index 3e314306ff08..f4a2a4a4521e 100644 --- a/llvm/test/CodeGen/ARM/vector-promotion.ll +++ b/llvm/test/CodeGen/ARM/vector-promotion.ll @@ -1,5 +1,5 @@ -; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-NORMAL %s -; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S -stress-cgp-store-extract | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-STRESS %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-NORMAL %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S -stress-cgp-store-extract | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-STRESS %s ; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s ; IR-BOTH-LABEL: @simpleOneInstructionPromotion diff --git a/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll index b02bdc3b5724..2a8d05c4f26e 100644 --- a/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll +++ b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s ; REQUIRES: aarch64-registered-target ; Check that we don't give up if unable to sink the first argument. diff --git a/llvm/test/CodeGen/Generic/addr-use-count.ll b/llvm/test/CodeGen/Generic/addr-use-count.ll index 00943b5a58e2..5c4c0c618794 100644 --- a/llvm/test/CodeGen/Generic/addr-use-count.ll +++ b/llvm/test/CodeGen/Generic/addr-use-count.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s ; REQUIRES: aarch64-registered-target ; Test that `%addr` is sunk, after we've increased limit on the number of the memory uses to scan. diff --git a/llvm/test/CodeGen/X86/callbr-codegenprepare.ll b/llvm/test/CodeGen/X86/callbr-codegenprepare.ll index 854cc4bf7a9c..d2380a730b5b 100644 --- a/llvm/test/CodeGen/X86/callbr-codegenprepare.ll +++ b/llvm/test/CodeGen/X86/callbr-codegenprepare.ll @@ -1,4 +1,4 @@ -;; RUN: opt -S -codegenprepare < %s | FileCheck %s +;; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s ;; Ensure that codegenprepare (via InstSimplify) doesn't eliminate the ;; phi here (which would cause a module verification error). diff --git a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll index 6e95c91e7398..c611e89f2786 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare %s -o - | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' %s -o - | FileCheck %s ; This file tests the different cases what are involved when codegen prepare ; tries to get sign/zero extension out of the way of addressing mode. ; This tests require an actual target as addressing mode decisions depends diff --git a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll index 6695576c557a..19d3e386ba62 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s -; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS -; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS -; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE ; rdar://7304838 ; CodeGenPrepare should move the zext into the block with the load diff --git a/llvm/test/CodeGen/X86/convertphitype.ll b/llvm/test/CodeGen/X86/convertphitype.ll index df01612252bf..6c77236fc698 100644 --- a/llvm/test/CodeGen/X86/convertphitype.ll +++ b/llvm/test/CodeGen/X86/convertphitype.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -cgp-optimize-phi-types=true %s -S | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -cgp-optimize-phi-types=true %s -S | FileCheck %s target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/pr58538.ll b/llvm/test/CodeGen/X86/pr58538.ll index 6c4103718950..7bae2594fc02 100644 --- a/llvm/test/CodeGen/X86/pr58538.ll +++ b/llvm/test/CodeGen/X86/pr58538.ll @@ -1,5 +1,5 @@ -; RUN: opt -codegenprepare -mtriple=x86_64 %s -S -o - | FileCheck %s -; RUN: opt -codegenprepare -mtriple=i386 %s -S -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64 %s -S -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=i386 %s -S -o - | FileCheck %s define i32 @f(i32 %0) { ; CHECK-LABEL: @f diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll index d8fdce63fecd..c9c037f1fcdc 100644 --- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll +++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -; RUN: opt -S -codegenprepare %s -mtriple=x86_64-apple-darwin -o - | FileCheck %s --check-prefix OPT +; RUN: opt -S -passes='require,function(codegenprepare)' %s -mtriple=x86_64-apple-darwin -o - | FileCheck %s --check-prefix OPT ; Teach CGP to dup returns to enable tail call optimization. ; rdar://9147433 diff --git a/llvm/test/CodeGen/X86/tailcall-extract.ll b/llvm/test/CodeGen/X86/tailcall-extract.ll index c3597a8e5b99..7a6c75c44ca7 100644 --- a/llvm/test/CodeGen/X86/tailcall-extract.ll +++ b/llvm/test/CodeGen/X86/tailcall-extract.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s -; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s --check-prefix OPT +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64-linux < %s | FileCheck %s --check-prefix OPT ; The exit block containing extractvalue can be duplicated into the BB diff --git a/llvm/test/DebugInfo/ARM/salvage-debug-info.ll b/llvm/test/DebugInfo/ARM/salvage-debug-info.ll index 3717abada42e..1564a80ded0e 100644 --- a/llvm/test/DebugInfo/ARM/salvage-debug-info.ll +++ b/llvm/test/DebugInfo/ARM/salvage-debug-info.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S %s -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S %s -o - | FileCheck %s ; typedef struct info { ; unsigned long long size; ; } info_t; diff --git a/llvm/test/DebugInfo/X86/zextload.ll b/llvm/test/DebugInfo/X86/zextload.ll index 888e230c258d..05d92b09c20c 100644 --- a/llvm/test/DebugInfo/X86/zextload.ll +++ b/llvm/test/DebugInfo/X86/zextload.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s ; ; This test case was generated from the following source code: ; diff --git a/llvm/test/Other/codegenprepare-and-debug.ll b/llvm/test/Other/codegenprepare-and-debug.ll index cc6344c1143f..51a846e27ff2 100644 --- a/llvm/test/Other/codegenprepare-and-debug.ll +++ b/llvm/test/Other/codegenprepare-and-debug.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s ; RUN: opt -strip-debug -codegenprepare -S < %s | FileCheck %s ; REQUIRES: x86-registered-target diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/combine-address-mode.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/combine-address-mode.ll index 91194864c013..25d4492f4c16 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/combine-address-mode.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/combine-address-mode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s @_MergedGlobals = external dso_local global <{ i32, i32 }>, align 4 diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll index adb13f1a4c9f..de7eeada6024 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck -enable-var-scope %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=aarch64-linux %s | FileCheck -enable-var-scope %s ; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads ; feeding a phi that zext's each loaded value. diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll index 0114d7f9f409..469d818af28f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll index e4c5b4ceee54..6444f6adcdcc 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll index 4caf6d0dc893..f72679f55e11 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S < %s | FileCheck %s -; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s +; RUN: opt -enable-debugify -passes='require,function(codegenprepare)' -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG ; Subset of tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll ; to test shouldFormOverflowOp on SPARC, where it is not profitable to create @@ -167,5 +167,5 @@ define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { ret i1 %ov } -; Check that every instruction inserted by -codegenprepare has a debug location. +; Check that every instruction inserted by -passes='require,function(codegenprepare)' has a debug location. ; DEBUG: CheckModuleDebugify: PASS diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll index 6a8b5733889e..fa53d536fa38 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=arm64-apple-ios7.0 %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=arm64-apple-ios7.0 %s | FileCheck %s %foo = type { i8 } diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/zext-to-shuffle.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/zext-to-shuffle.ll index 60b1e81e3dcd..8999e8d901ad 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/zext-to-shuffle.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/zext-to-shuffle.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/addressing-modes.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/addressing-modes.ll index acd40d744f71..20a5a9ededac 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/addressing-modes.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/addressing-modes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn--amdhsa < %s | FileCheck %s define amdgpu_kernel void @test_sink_as999_small_max_mubuf_offset(ptr addrspace(999) %out, ptr addrspace(999) %in) { ; CHECK-LABEL: @test_sink_as999_small_max_mubuf_offset( diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll index 63098ab098a2..c64d5a357f3a 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s ; COMMON-LABEL: @test_sink_ptrtoint_asc( ; ASC: addrspacecast diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll index 51d9e5f1ba32..22242122833e 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn--amdhsa < %s | FileCheck %s ; CHECK-LABEL: @no_sink_local_to_flat( ; CHECK: addrspacecast diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll index 996cab1d1d2c..ff5cef7e781f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-none-eabi" diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll index 52c06fd52b7b..f84491933b25 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S %s -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S %s -o - | FileCheck %s target triple = "thumbv7-apple-ios7.0.0" diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll index ae76dbda4aa1..9bec9b53ed0c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -mtriple=arm7-unknown-unknown -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=arm7-unknown-unknown -S < %s | FileCheck %s declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) nounwind declare void @llvm.memmove.p0.p0.i32(ptr, ptr, i32, i1) nounwind diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll index 3fbc21331410..2f32ef94f82c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.main-arm-none-eabi" diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll index 838486aa2486..49cffe286a54 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll index cd2087d94149..69c8b92a3558 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-none-eabi" diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll index 76b119fe36aa..3f113e6ea163 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s target triple = "armv8m.main-none-eabi" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll index 94adf1970938..b8a45e847afc 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll index c571da4411e7..d46aaf51a59c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll index 21e47c614ad0..9ec533c2e653 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll index 424f7c3b0271..463fa0d487a7 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll index 17b0dbf81ac2..af9b5ccd7287 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll index 374f30dba508..7ee7f0550318 100644 --- a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll b/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll index 79aebb9c247a..65177d5ae3d7 100644 --- a/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll +++ b/llvm/test/Transforms/CodeGenPrepare/PowerPC/split-store-alignment.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=powerpc64-unknown-linux-gnu -data-layout="E-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=BE %s -; RUN: opt -S -codegenprepare -mtriple=powerpc64le-unknown-linux-gnu -data-layout="e-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=LE %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=powerpc64-unknown-linux-gnu -data-layout="E-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=BE %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=powerpc64le-unknown-linux-gnu -data-layout="e-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=LE %s define void @split_store_align1(float %x, ptr %p) { ; BE-LABEL: @split_store_align1( diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll index 130401d78171..863b0b4ad26f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=riscv32 %s \ +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=riscv32 %s \ ; RUN: | FileCheck --check-prefixes=CHECK,NOZBS %s -; RUN: opt -S -codegenprepare -mtriple=riscv32 -mattr=+zbs %s \ +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=riscv32 -mattr=+zbs %s \ ; RUN: | FileCheck --check-prefixes=CHECK,ZBS %s -; RUN: opt -S -codegenprepare -mtriple=riscv64 %s \ +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=riscv64 %s \ ; RUN: | FileCheck --check-prefixes=CHECK,NOZBS %s -; RUN: opt -S -codegenprepare -mtriple=riscv64 -mattr=zbs %s \ +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=riscv64 -mattr=zbs %s \ ; RUN: | FileCheck --check-prefixes=CHECK,ZBS %s @A = global i32 zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/cttz-ctlz.ll index c70112e91ebd..00ad32e96748 100644 --- a/llvm/test/Transforms/CodeGenPrepare/RISCV/cttz-ctlz.ll +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/cttz-ctlz.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target triple = "riscv64-unknown-unknown" diff --git a/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll index 7525ae14fa35..ec60238cbf92 100644 --- a/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S < %s | FileCheck %s -; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s +; RUN: opt -enable-debugify -passes='require,function(codegenprepare)' -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG ; Subset of tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll ; to test shouldFormOverflowOp on SPARC, where it is not profitable to create @@ -119,5 +119,5 @@ define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { ret i1 %ov } -; Check that every instruction inserted by -codegenprepare has a debug location. +; Check that every instruction inserted by -passes='require,function(codegenprepare)' has a debug location. ; DEBUG: CheckModuleDebugify: PASS diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll index d23dd8c792e0..ce79e9480bb3 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s ; The following target lines are needed for the test to exercise what it should. ; Without these lines, CodeGenPrepare does not try to sink the bitcasts. diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll index 9eede8cf361b..bcb7edd6e91b 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash.ll index 7433ff74bab5..9ce830c39477 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll index 6be4881e43bc..83abe336c9e8 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll index 440afdeff10a..b456f551c8c8 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW -; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ -; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s --check-prefix=SLOW +; RUN: opt -S -passes='require,function(codegenprepare)' -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ +; RUN: opt -S -passes='require,function(codegenprepare)' -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ target triple = "x86_64-unknown-unknown" target datalayout = "e-n32:64" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll b/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll index ef7bb22c6d14..f6d18ab51f17 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/delete-assume-dead-code.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64-linux < %s | FileCheck %s define i32 @test1(ptr %d) nounwind { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll index a0814e0a5f20..5349afc18d84 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -disable-cgp-branch-opts -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/freeze-brcond.ll b/llvm/test/Transforms/CodeGenPrepare/X86/freeze-brcond.ll index c37227f5fa82..e9ecfd1615d4 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/freeze-brcond.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/freeze-brcond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll index 41b1ac2c05fc..e62ba5d5a7f5 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s -; RUN: opt -S -codegenprepare -cgpp-huge-func=0 < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -cgpp-huge-func=0 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll index 2cf98491acb9..7899477afdb2 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gep-unmerging.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gep-unmerging.ll index d2eae6954c5d..a2ea3f7e36a0 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gep-unmerging.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gep-unmerging.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64 < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64 < %s | FileCheck %s @exit_addr = constant ptr blockaddress(@gep_unmerging, %exit) @op1_addr = constant ptr blockaddress(@gep_unmerging, %op1) diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/invariant.group.ll b/llvm/test/Transforms/CodeGenPrepare/X86/invariant.group.ll index 3c81a4beb834..a0bac01d1165 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/invariant.group.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64 < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64 < %s | FileCheck %s @tmp = global i8 0 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll index ea07a5fe9bc5..dbd5e87f2c28 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s ; REQUIRES: x86-registered-target target triple = "x86_64-pc-linux" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/nonintegral.ll b/llvm/test/Transforms/CodeGenPrepare/X86/nonintegral.ll index 2f42ad889b42..9d53855ada79 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/nonintegral.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/nonintegral.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s -; RUN: opt -S -codegenprepare -addr-sink-using-gep=false < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -addr-sink-using-gep=false < %s | FileCheck %s ; This target data layout is modified to have a non-integral addrspace(1), ; in order to verify that codegenprepare does not try to introduce illegal diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll index be651d7eb004..aaf3df093468 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll index a324f6f44e5c..653f34635648 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S < %s | FileCheck %s -; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s +; RUN: opt -enable-debugify -passes='require,function(codegenprepare)' -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -636,6 +636,6 @@ exit: ret void } -; Check that every instruction inserted by -codegenprepare has a debug location. +; Check that every instruction inserted by -passes='require,function(codegenprepare)' has a debug location. ; DEBUG: CheckModuleDebugify: PASS diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll index 3ef27c7c950f..51fba2229f3c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll index eec9475a1c48..e9d0806235c9 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll index 0366b7d7e6d2..eff88bba3773 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/recursively-delete-dead-instructions.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64-linux < %s | FileCheck %s declare void @llvm.assume(i1 noundef) nounwind willreturn diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll b/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll index 1d5e6ea0978a..6b7a122b3e26 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/remove-assume-block.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=x86_64-linux < %s | FileCheck %s ; ; Ensure that blocks that only contain @llvm.assume are removed completely ; during CodeGenPrepare. diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll index d5eda2065144..e4bb9a54cd3c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S < %s | FileCheck %s -; RUN: opt -debugify -codegenprepare -S < %s | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s +; RUN: opt -enable-debugify -passes='require,function(codegenprepare)' -S < %s | FileCheck %s -check-prefix=DEBUG target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll index cac4697fbc64..1085d2235ca6 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES -; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO +; RUN: opt -S -passes='require,function(codegenprepare)' -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES +; RUN: opt -S -passes='require,function(codegenprepare)' -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-inseltpoison.ll index d5e69b9d802e..7660ee47fdbd 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-inseltpoison.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll index 336421e4c500..076915028aef 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK +; RUN: opt -S -passes='require,function(codegenprepare)' -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll index c4f85d1570d9..2a2a504a2c76 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false %s | FileCheck %s --check-prefix=CHECK +; RUN: opt -S -passes='require,function(codegenprepare)' -disable-complex-addr-modes=false %s | FileCheck %s --check-prefix=CHECK target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll index 97b11a2e1f1c..f75af606eff0 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll index f2e82212d0fa..a760f56b151f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP +; RUN: opt -S -passes='require,function(codegenprepare)' < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/split-indirect-loop.ll b/llvm/test/Transforms/CodeGenPrepare/X86/split-indirect-loop.ll index c5d18ff50309..7f7f80910b48 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/split-indirect-loop.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/split-indirect-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64 < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64 < %s | FileCheck %s ; Test that an invalid CFG is not created by splitIndirectCriticalEdges ; transformation when the 'target' block is a loop to itself. diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll b/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll index 3bced480f31a..0335da94ea50 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/split-store-alignment.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-unknown-unknown -force-split-store -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-unknown-unknown -force-split-store -S < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-w64-windows-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/statepoint-relocate.ll b/llvm/test/Transforms/CodeGenPrepare/X86/statepoint-relocate.ll index a8a6f7baf9b4..babaa08a959b 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/statepoint-relocate.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/statepoint-relocate.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll index 9dc88a100daa..dd47d5eb6cc4 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64-linux < %s | FileCheck %s ; The ret instruction can be duplicated into BB case2 even though there is an ; intermediate BB exit1 and call to llvm.assume. diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll index 557974fcfe54..db7d960899ca 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; AVX1-LABEL: @vector_variable_shift_right_v4i32( @@ -409,5 +409,5 @@ exit: declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) #1 -; Check that every instruction inserted by -codegenprepare has a debug location. +; Check that every instruction inserted by -passes='require,function(codegenprepare)' has a debug location. ; DEBUG: CheckModuleDebugify: PASS diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll index 482e822ea3d8..e0f04f77efa0 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -passes='require,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; AVX1-LABEL: @vector_variable_shift_right_v4i32( @@ -409,5 +409,5 @@ exit: declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) #1 -; Check that every instruction inserted by -codegenprepare has a debug location. +; Check that every instruction inserted by -passes='require,function(codegenprepare)' has a debug location. ; DEBUG: CheckModuleDebugify: PASS diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/widenable-condition.ll b/llvm/test/Transforms/CodeGenPrepare/X86/widenable-condition.ll index b26876e0e1e2..12230ec689cf 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/widenable-condition.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/widenable-condition.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -S -mtriple=x86_64 < %s | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -S -mtriple=x86_64 < %s | FileCheck %s ; Check the idiomatic guard pattern to ensure it's lowered correctly. define void @test_guard(i1 %cond_0) { diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll index 72d1672eb4f7..ce1b6bd5ae63 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 -; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP -; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 -; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin10.9.0" diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll index c14918a6956f..9e82844dfc2f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 -; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP -; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 -; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 +; RUN: opt -S -passes='require,function(codegenprepare)' -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin10.9.0" diff --git a/llvm/test/Transforms/CodeGenPrepare/dead-allocation.ll b/llvm/test/Transforms/CodeGenPrepare/dead-allocation.ll index 637040a0d56d..9550e748da6d 100644 --- a/llvm/test/Transforms/CodeGenPrepare/dead-allocation.ll +++ b/llvm/test/Transforms/CodeGenPrepare/dead-allocation.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Eliminate the dead allocation instruction ; REQUIRES: arm-registered-target -; RUN: opt -codegenprepare < %s -S | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' < %s -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7--linux-gnueabihf" diff --git a/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll b/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll index 608ad4c0a32f..d25b9c91aff6 100644 --- a/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll +++ b/llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll @@ -1,5 +1,5 @@ ; REQUIRES: aarch64-registered-target -; RUN: opt -codegenprepare < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' < %s -mtriple=aarch64-none-linux-gnu -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll index 1a113ff16188..6e5ef1aa2539 100644 --- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll +++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86-registered-target -; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=0 < %s | opt -codegenprepare -S | FileCheck %s +; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=0 < %s | opt -passes='require,function(codegenprepare)' -S | FileCheck %s ; Test to ensure that split cold function gets 0 entry count profile ; metadata when compiling with pgo. diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll index ff8a804beeb5..a56efe8dd3f3 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll @@ -1,4 +1,4 @@ -; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s +; RUN: opt -passes='require,function(codegenprepare)' -passes=load-store-vectorizer %s -S -o - | FileCheck %s ; RUN: opt -passes=load-store-vectorizer %s -S -o - | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll index a404220056c8..ef2ddbc33cee 100644 --- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll +++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -1,7 +1,7 @@ ; REQUIRES: x86-registered-target -; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof | opt -S -codegenprepare | FileCheck %s -; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof | opt -S -codegenprepare -profile-unknown-in-special-section -partial-profile | FileCheck %s --check-prefix=UNKNOWN -; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -S | opt -S -codegenprepare | FileCheck %s --check-prefix=ACCURATE +; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof | opt -S -passes='require,function(codegenprepare)' | FileCheck %s +; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof | opt -S -passes='require,function(codegenprepare)' -profile-unknown-in-special-section -partial-profile | FileCheck %s --check-prefix=UNKNOWN +; RUN: opt -S %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -S | opt -S -passes='require,function(codegenprepare)' | FileCheck %s --check-prefix=ACCURATE target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 671a33309a1b..ecbf2173aa1d 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -441,7 +441,7 @@ int main(int argc, char **argv) { initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry); initializeCallBrPreparePass(Registry); - initializeCodeGenPreparePass(Registry); + initializeCodeGenPrepareLegacyPassPass(Registry); initializeAtomicExpandPass(Registry); initializeWinEHPreparePass(Registry); initializeDwarfEHPrepareLegacyPassPass(Registry); -- Gitee From 45750126bd426825e0fe4838be825bc339d9eca3 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 16 Jan 2024 14:15:33 -0800 Subject: [PATCH 22/47] [BasicBlockSections] Always keep the entry block in the beginning of the function. (#74696) BasicBlockSections must enforce placing the entry block at the beginning of the function regardless of the basic block sections profile. --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 4 +- llvm/lib/CodeGen/BasicBlockSections.cpp | 10 +++-- .../BasicBlockSectionsProfileReader.cpp | 7 --- .../basic-block-sections-clusters-error.ll | 6 +-- .../X86/basic-block-sections-entryblock.ll | 43 +++++++++++++++++++ 5 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-entryblock.ll diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 69b9a95b1018..1ee571f5e5a6 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -77,10 +77,10 @@ private: MBBSectionID(SectionType T) : Type(T), Number(0) {} }; -// This structure represents the information for a basic block. +// This structure represents the information for a basic block pertaining to +// the basic block sections profile. struct UniqueBBID { unsigned BaseID; - // sections profile). unsigned CloneID; }; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 94b5a503fbd0..dbb6ebb3d7eb 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -318,9 +318,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { MF.setBBSectionsType(BBSectionsType); assignSections(MF, FuncClusterInfo); - // We make sure that the cluster including the entry basic block precedes all - // other clusters. - auto EntryBBSectionID = MF.front().getSectionID(); + const MachineBasicBlock &EntryBB = MF.front(); + auto EntryBBSectionID = EntryBB.getSectionID(); // Helper function for ordering BB sections as follows: // * Entry section (section including the entry block). @@ -341,12 +340,17 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { // contiguous and ordered accordingly. Furthermore, clusters are ordered in // increasing order of their section IDs, with the exception and the // cold section placed at the end of the function. + // Also, we force the entry block of the function to be placed at the + // beginning of the function, regardless of the requested order. auto Comparator = [&](const MachineBasicBlock &X, const MachineBasicBlock &Y) { auto XSectionID = X.getSectionID(); auto YSectionID = Y.getSectionID(); if (XSectionID != YSectionID) return MBBSectionOrder(XSectionID, YSectionID); + // Make sure that the entry block is placed at the beginning. + if (&X == &EntryBB || &Y == &EntryBB) + return &X == &EntryBB; // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index e4ee2ac63138..39bca55b4788 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -214,10 +214,6 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Twine("duplicate basic block id found '") + BasicBlockIDStr + "'"); - if (!BasicBlockID->BaseID && CurrentPosition) - return createProfileParseError( - "entry BB (0) does not begin a cluster."); - FI->second.ClusterInfo.emplace_back(BBClusterInfo{ *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); } @@ -288,9 +284,6 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() { if (!FuncBBIDs.insert(BBID).second) return createProfileParseError( Twine("duplicate basic block id found '") + BBIDStr + "'"); - if (BBID == 0 && CurrentPosition) - return createProfileParseError( - "entry BB (0) does not begin a cluster"); FI->second.ClusterInfo.emplace_back( BBClusterInfo({{static_cast(BBID), 0}, diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 597d8f6707ec..d6f3d5010b55 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -5,10 +5,6 @@ ; RUN: echo '!!1' >> %t1 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 ; CHECK-ERROR1: LLVM ERROR: invalid profile {{.*}} at line 3: duplicate basic block id found '1' -; RUN: echo '!dummy1' > %t2 -; RUN: echo '!!4 0' >> %t2 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 -; CHECK-ERROR2: LLVM ERROR: invalid profile {{.*}} at line 2: entry BB (0) does not begin a cluster ; RUN: echo '!dummy1' > %t3 ; RUN: echo '!!-1' >> %t3 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR3 @@ -48,7 +44,7 @@ ; RUN: echo 'f dummy1' >> %t11 ; RUN: echo 'c 0 1.a' >> %t11 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t11 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR11 -; CHECK-ERROR11: LLVM ERROR: invalid profile {{.*}} at line 3: unable to parse clone id: 'a' +; CHECK-ERROR11: LLVM ERROR: invalid profile {{.*}} at line 3: unable to parse clone id: 'a' ; RUN: echo 'v1' > %t12 ; RUN: echo 'f dummy1' >> %t12 ; RUN: echo 'c 0 1' >> %t12 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-entryblock.ll b/llvm/test/CodeGen/X86/basic-block-sections-entryblock.ll new file mode 100644 index 000000000000..349015e1403d --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-entryblock.ll @@ -0,0 +1,43 @@ +; COM: Tests to verify that the entry basic block is always placed at the beginning of its section. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f foo' >> %t1 +; RUN: echo 'c2 0' >> %t1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -O0 | FileCheck %s -check-prefix=LINUX-SECTIONS1 + +; RUN: echo 'v1' > %t2 +; RUN: echo 'f foo' >> %t2 +; RUN: echo 'c2' >> %t2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 -O0 | FileCheck %s -check-prefix=LINUX-SECTIONS2 + + +define void @foo(i1 %a, i1 %b) { +b0: + br i1 %a, label %b1, label %b2 + +b1: ; preds = %b0 + ret void + +b2: ; preds = %b0 + ret void +} + +;; Check that %b0 is emitted at the beginning of the function. +; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits +; LINUX-SECTIONS1: foo: +; LINUX-SECTIONS1: # %bb.0: # %b0 +; LINUX-SECTIONS1: jne foo.cold +; LINUX-SECTIONS1: # %bb.2: # %b2 +; LINUX-SECTIONS1: retq +; LINUX-SECTIONS1: .section .text.split.foo,"ax",@progbits +; LINUX-SECTIONS1: foo.cold: # %b1 +; LINUX-SECTIONS1: retq + +; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits +; LINUX-SECTIONS2: foo: +; LINUX-SECTIONS2: # %bb.0: # %b0 +; LINUX-SECTIONS2: je foo.__part.0 +; LINUX-SECTIONS2: # %bb.1: # %b1 +; LINUX-SECTIONS2: retq +; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits +; LINUX-SECTIONS2: foo.__part.0: # %b2 +; LINUX-SECTIONS2: retq -- Gitee From 18879d4cab3a3d359cde29a55299ba4b862311b2 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 16 Jan 2024 14:17:10 -0800 Subject: [PATCH 23/47] [SHT_LLVM_BB_ADDR_MAP,NFC] Add SCOPED_TRACE for convenient mapping of failures to test cases. (#78335) Although parameterized gtests are preferred for this, our tests are not very straightforward. So I decided to add SCOPED_TRACE for different test cases and the lambda checks as well. Typical test failure message now looks like: ``` ...llvm-project/llvm/unittests/Object/ELFObjectFileTest.cpp:737 Expected equality of these values: *BBAddrMaps Which is: { 32-byte object <11-11 01-00 00-00 00-00 C0-A9 FB-3E E4-55 00-00 D0-A9 FB-3E E4-55 00-00 D0-A9 FB-3E E4-55 00-00>, 32-byte object <22-22 02-00 00-00 00-00 F0-8E FB-3E E4-55 00-00 00-8F FB-3E E4-55 00-00 00-8F FB-3E E4-55 00-00> } ExpectedResult Which is: { 32-byte object <33-33 03-00 00-00 00-00 50-A7 FB-3E E4-55 00-00 60-A7 FB-3E E4-55 00-00 60-A7 FB-3E E4-55 00-00> } Google Test trace: ...llvm-project/llvm/unittests/Object/ELFObjectFileTest.cpp:726: for TextSectionIndex: 1 and object yaml: --- !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC Sections: - Name: .llvm_bb_addr_map_1 Type: SHT_LLVM_BB_ADDR_MAP Link: 1 Entries: - Version: 2 Address: 0x11111 BBEntries: - ID: 1 AddressOffset: 0x0 Size: 0x1 Metadata: 0x2 - Name: .llvm_bb_addr_map_2 Type: SHT_LLVM_BB_ADDR_MAP Link: 1 Entries: - Version: 2 Address: 0x22222 BBEntries: - ID: 2 AddressOffset: 0x0 Size: 0x2 Metadata: 0x4 - Name: .llvm_bb_addr_map_3 Type: SHT_LLVM_BB_ADDR_MAP Link: 2 Entries: - Version: 1 Address: 0x33333 BBEntries: - ID: 0 AddressOffset: 0x0 Size: 0x3 Metadata: 0x6 - Name: .llvm_bb_addr_map_4 Type: SHT_LLVM_BB_ADDR_MAP_V0 # Link: 0 (by default, can be overriden) Entries: - Version: 0 Address: 0x44444 BBEntries: - ID: 0 AddressOffset: 0x0 Size: 0x4 Metadata: 0x18 ...llvm-project/llvm/unittests/Object/ELFObjectFileTest.cpp:757: normal sections ``` --- llvm/unittests/Object/ELFObjectFileTest.cpp | 250 ++++++++++++-------- 1 file changed, 157 insertions(+), 93 deletions(-) diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 502db0862ab8..ad85371d58ee 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -514,8 +514,11 @@ Sections: Metadata: 0x2 )"; - DoCheck(UnsupportedVersionYamlString, - "unsupported SHT_LLVM_BB_ADDR_MAP version: 3"); + { + SCOPED_TRACE("unsupported version"); + DoCheck(UnsupportedVersionYamlString, + "unsupported SHT_LLVM_BB_ADDR_MAP version: 3"); + } SmallString<128> CommonVersionedYamlString(CommonYamlString); CommonVersionedYamlString += R"( @@ -533,8 +536,12 @@ Sections: TruncatedYamlString += R"( ShSize: 0xb )"; - DoCheck(TruncatedYamlString, "unable to decode LEB128 at offset 0x0000000b: " - "malformed uleb128, extends past end"); + { + SCOPED_TRACE("truncated section"); + DoCheck(TruncatedYamlString, + "unable to decode LEB128 at offset 0x0000000b: " + "malformed uleb128, extends past end"); + } // Check that we can detect when the encoded BB entry fields exceed the UINT32 // limit. @@ -561,12 +568,15 @@ Sections: Metadata: 0x100000000 )"; - DoCheck(OverInt32LimitYamlStrings[0], - "ULEB128 value at offset 0x10 exceeds UINT32_MAX (0x100000000)"); - DoCheck(OverInt32LimitYamlStrings[1], - "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); - DoCheck(OverInt32LimitYamlStrings[2], - "ULEB128 value at offset 0x1a exceeds UINT32_MAX (0x100000000)"); + { + SCOPED_TRACE("overlimit fields"); + DoCheck(OverInt32LimitYamlStrings[0], + "ULEB128 value at offset 0x10 exceeds UINT32_MAX (0x100000000)"); + DoCheck(OverInt32LimitYamlStrings[1], + "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); + DoCheck(OverInt32LimitYamlStrings[2], + "ULEB128 value at offset 0x1a exceeds UINT32_MAX (0x100000000)"); + } // Check the proper error handling when the section has fields exceeding // UINT32 and is also truncated. This is for checking that we don't generate @@ -586,13 +596,16 @@ Sections: ShSize: 0x1b )"; - DoCheck(OverInt32LimitAndTruncated[0], - "unable to decode LEB128 at offset 0x00000015: malformed uleb128, " - "extends past end"); - DoCheck(OverInt32LimitAndTruncated[1], - "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); - DoCheck(OverInt32LimitAndTruncated[2], - "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); + { + SCOPED_TRACE("overlimit fields, truncated section"); + DoCheck(OverInt32LimitAndTruncated[0], + "unable to decode LEB128 at offset 0x00000015: malformed uleb128, " + "extends past end"); + DoCheck(OverInt32LimitAndTruncated[1], + "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); + DoCheck(OverInt32LimitAndTruncated[2], + "ULEB128 value at offset 0x15 exceeds UINT32_MAX (0x100000000)"); + } // Check for proper error handling when the 'NumBlocks' field is overridden // with an out-of-range value. @@ -601,8 +614,11 @@ Sections: NumBlocks: 0x100000000 )"; - DoCheck(OverLimitNumBlocks, - "ULEB128 value at offset 0xa exceeds UINT32_MAX (0x100000000)"); + { + SCOPED_TRACE("overlimit 'NumBlocks' field"); + DoCheck(OverLimitNumBlocks, + "ULEB128 value at offset 0xa exceeds UINT32_MAX (0x100000000)"); + } } // Test for the ELFObjectFile::readBBAddrMap API. @@ -675,6 +691,9 @@ Sections: auto DoCheckSucceeds = [&](StringRef YamlString, std::optional TextSectionIndex, std::vector ExpectedResult) { + SCOPED_TRACE("for TextSectionIndex: " + + (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") + + " and object yaml:\n" + YamlString); SmallString<0> Storage; Expected> ElfOrErr = toBinary(Storage, YamlString); @@ -691,6 +710,9 @@ Sections: auto DoCheckFails = [&](StringRef YamlString, std::optional TextSectionIndex, const char *ErrMsg) { + SCOPED_TRACE("for TextSectionIndex: " + + (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") + + " and object yaml:\n" + YamlString); SmallString<0> Storage; Expected> ElfOrErr = toBinary(Storage, YamlString); @@ -703,15 +725,21 @@ Sections: FailedWithMessage(ErrMsg)); }; - // Check that we can retrieve the data in the normal case. - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps); - // Check that when no bb-address-map section is found for a text section, - // we return an empty result. - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}); + { + SCOPED_TRACE("normal sections"); + // Check that we can retrieve the data in the normal case. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, + Section0BBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, + Section2BBAddrMaps); + // Check that when no bb-address-map section is found for a text section, + // we return an empty result. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}); + } // Check that we detect when a bb-addr-map section is linked to an invalid // (not present) section. @@ -724,10 +752,13 @@ Sections: "unable to get the linked-to section for " "SHT_LLVM_BB_ADDR_MAP_V0 section with index 4: invalid section " "index: 10"); - // Linked sections are not checked when we don't target a specific text - // section. - DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps); + { + SCOPED_TRACE("invalid linked section"); + // Linked sections are not checked when we don't target a specific text + // section. + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps); + } // Check that we can detect when bb-address-map decoding fails. SmallString<128> TruncatedYamlString(CommonYamlString); @@ -735,14 +766,18 @@ Sections: ShSize: 0x8 )"; - DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, - "unable to read SHT_LLVM_BB_ADDR_MAP_V0 section with index 4: " - "unable to decode LEB128 at offset 0x00000008: malformed " - "uleb128, extends past end"); - // Check that we can read the other section's bb-address-maps which are - // valid. - DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, - Section1BBAddrMaps); + { + SCOPED_TRACE("truncated section"); + DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, + "unable to read SHT_LLVM_BB_ADDR_MAP_V0 section with index 4: " + "unable to decode LEB128 at offset 0x00000008: malformed " + "uleb128, extends past end"); + + // Check that we can read the other section's bb-address-maps which are + // valid. + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps); + } } // Tests for error paths of the ELFFile::decodeBBAddrMap with PGOAnalysisMap @@ -792,9 +827,12 @@ Sections: Metadata: 0x2 )"; - DoCheck(UnsupportedLowVersionYamlString, - "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when PGO features " - "are enabled: version = 1 feature = 4"); + { + SCOPED_TRACE("unsupported version"); + DoCheck(UnsupportedLowVersionYamlString, + "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when PGO features " + "are enabled: version = 1 feature = 4"); + } SmallString<128> CommonVersionedYamlString(CommonYamlString); CommonVersionedYamlString += R"( @@ -813,9 +851,12 @@ Sections: Feature: 0x01 )"; - DoCheck(MissingFuncEntryCount, - "unable to decode LEB128 at offset 0x0000000b: malformed uleb128, " - "extends past end"); + { + SCOPED_TRACE("missing function entry count"); + DoCheck(MissingFuncEntryCount, + "unable to decode LEB128 at offset 0x0000000b: malformed uleb128, " + "extends past end"); + } // Check that we fail when basic block frequency is enabled but not provided. SmallString<128> MissingBBFreq(CommonYamlString); @@ -829,8 +870,11 @@ Sections: Metadata: 0x2 )"; - DoCheck(MissingBBFreq, "unable to decode LEB128 at offset 0x0000000f: " - "malformed uleb128, extends past end"); + { + SCOPED_TRACE("missing bb frequency"); + DoCheck(MissingBBFreq, "unable to decode LEB128 at offset 0x0000000f: " + "malformed uleb128, extends past end"); + } // Check that we fail when branch probability is enabled but not provided. SmallString<128> MissingBrProb(CommonYamlString); @@ -862,8 +906,11 @@ Sections: BrProb: 0xF0000000 )"; - DoCheck(MissingBrProb, "unable to decode LEB128 at offset 0x00000017: " - "malformed uleb128, extends past end"); + { + SCOPED_TRACE("missing branch probability"); + DoCheck(MissingBrProb, "unable to decode LEB128 at offset 0x00000017: " + "malformed uleb128, extends past end"); + } } // Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap. @@ -1046,6 +1093,10 @@ Sections: [&](StringRef YamlString, std::optional TextSectionIndex, std::vector ExpectedResult, std::optional> ExpectedPGO) { + SCOPED_TRACE( + "for TextSectionIndex: " + + (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") + + " and object yaml:\n" + YamlString); SmallString<0> Storage; Expected> ElfOrErr = toBinary(Storage, YamlString); @@ -1073,6 +1124,9 @@ Sections: auto DoCheckFails = [&](StringRef YamlString, std::optional TextSectionIndex, const char *ErrMsg) { + SCOPED_TRACE("for TextSectionIndex: " + + (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") + + " and object yaml:\n" + YamlString); SmallString<0> Storage; Expected> ElfOrErr = toBinary(Storage, YamlString); @@ -1087,29 +1141,32 @@ Sections: FailedWithMessage(ErrMsg)); }; - // Check that we can retrieve the data in the normal case. - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps, std::nullopt); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps, - std::nullopt); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps, - std::nullopt); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps, - std::nullopt); - - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps, AllPGOAnalysisMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps, - Section0PGOAnalysisMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps, - Section1PGOAnalysisMaps); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps, - Section2PGOAnalysisMaps); - // Check that when no bb-address-map section is found for a text section, - // we return an empty result. - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, std::nullopt); - DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, - std::vector{}); + { + SCOPED_TRACE("normal sections"); + // Check that we can retrieve the data in the normal case. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, + Section0BBAddrMaps, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, + Section2BBAddrMaps, std::nullopt); + + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, AllPGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, + Section0BBAddrMaps, Section0PGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, Section1PGOAnalysisMaps); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, + Section2BBAddrMaps, Section2PGOAnalysisMaps); + // Check that when no bb-address-map section is found for a text section, + // we return an empty result. + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, std::nullopt); + DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/3, {}, + std::vector{}); + } // Check that we detect when a bb-addr-map section is linked to an invalid // (not present) section. @@ -1118,16 +1175,20 @@ Sections: Link: 10 )"; - DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/5, - "unable to get the linked-to section for " - "SHT_LLVM_BB_ADDR_MAP section with index 5: invalid section " - "index: 10"); - // Linked sections are not checked when we don't target a specific text - // section. - DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps, std::nullopt); - DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, - AllBBAddrMaps, AllPGOAnalysisMaps); + { + SCOPED_TRACE("invalid linked section"); + DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/5, + "unable to get the linked-to section for " + "SHT_LLVM_BB_ADDR_MAP section with index 5: invalid section " + "index: 10"); + + // Linked sections are not checked when we don't target a specific text + // section. + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, std::nullopt); + DoCheckSucceeds(InvalidLinkedYamlString, /*TextSectionIndex=*/std::nullopt, + AllBBAddrMaps, AllPGOAnalysisMaps); + } // Check that we can detect when bb-address-map decoding fails. SmallString<128> TruncatedYamlString(CommonYamlString); @@ -1135,16 +1196,19 @@ Sections: ShSize: 0xa )"; - DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, - "unable to read SHT_LLVM_BB_ADDR_MAP section with index 5: " - "unable to decode LEB128 at offset 0x0000000a: malformed " - "uleb128, extends past end"); - // Check that we can read the other section's bb-address-maps which are - // valid. - DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, - Section1BBAddrMaps, std::nullopt); - DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, - Section1BBAddrMaps, Section1PGOAnalysisMaps); + { + SCOPED_TRACE("truncated section"); + DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 5: " + "unable to decode LEB128 at offset 0x0000000a: malformed " + "uleb128, extends past end"); + // Check that we can read the other section's bb-address-maps which are + // valid. + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, std::nullopt); + DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, + Section1BBAddrMaps, Section1PGOAnalysisMaps); + } } // Test for ObjectFile::getRelocatedSection: check that it returns a relocated -- Gitee From 058cfc3a678bd4ebccdcdd78114f9f3a60e5033f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 19 Jan 2024 11:34:00 -0800 Subject: [PATCH 24/47] [SHT_LLVM_BB_ADDR_MAP] Add assertion and clarify docstring (#77374) This patch adds an assertion to readBBAddrMapImpl to confirm that PGOAnalyses and BBAddrMaps are of the same size when PGO information is requested (part of the API contract). This patch also updates the docstring for readBBAddrMap to better clarify what is guaranteed. --- llvm/include/llvm/Object/ELFObjectFile.h | 11 ++++++----- llvm/lib/Object/ELFObjectFile.cpp | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 5997a35a3905..a41573a67d4b 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -110,11 +110,12 @@ public: Expected> readDynsymVersions() const; /// Returns a vector of all BB address maps in the object file. When - // `TextSectionIndex` is specified, only returns the BB address maps - // corresponding to the section with that index. When `PGOAnalyses`is - // specified, the vector is cleared then filled with extra PGO data. - // `PGOAnalyses` will always be the same length as the return value on - // success, otherwise it is empty. + /// `TextSectionIndex` is specified, only returns the BB address maps + /// corresponding to the section with that index. When `PGOAnalyses`is + /// specified (PGOAnalyses is not nullptr), the vector is cleared then filled + /// with extra PGO data. `PGOAnalyses` will always be the same length as the + /// return value when it is requested assuming no error occurs. Upon failure, + /// `PGOAnalyses` will be emptied. Expected> readBBAddrMap(std::optional TextSectionIndex = std::nullopt, std::vector *PGOAnalyses = nullptr) const; diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 462cef1c6d4c..7503e2efdafc 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -755,6 +755,10 @@ Expected> static readBBAddrMapImpl( std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(), std::back_inserter(BBAddrMaps)); } + if (PGOAnalyses) + assert(PGOAnalyses->size() == BBAddrMaps.size() && + "The same number of BBAddrMaps and PGOAnalysisMaps should be " + "returned when PGO information is requested"); return BBAddrMaps; } -- Gitee From 2c27383a637293964cbf4751f5f0c44d543214d8 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 19 Jan 2024 14:28:31 -0800 Subject: [PATCH 25/47] [llvm-objdump] Add support for symbolizing PGOBBAddrMap Info (#76386) This patch adds in support for symbolizing PGO information contained within the SHT_LLVM_BB_ADDR_MAP section in llvm-objdump. The outputs are simply the raw values contained within the section. --- .../llvm-objdump/X86/elf-pgoanalysismap.yaml | 180 ++++++++++++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 100 ++++++++-- 2 files changed, 263 insertions(+), 17 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml new file mode 100644 index 000000000000..c4bf443f920a --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml @@ -0,0 +1,180 @@ +## Test that in the presence of SHT_LLVM_BB_ADDR_MAP sections which also +## contain PGO data, --symbolize-operands is able to label the basic blocks +## correctly. + +## Check the case where we only have entry counts. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-objdump %t1 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --check-prefix=ENTRYCOUNT + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text.foo + Type: SHT_PROGBITS + Address: 0x0 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '50' + - Name: .llvm_bb_addr_map.foo + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.foo + Entries: + - Version: 2 + Address: 0x0 + Feature: 0x1 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + PGOAnalyses: + - FuncEntryCount: 1000 +Symbols: + - Name: foo + Section: .text.foo + Value: 0x0 + +# ENTRYCOUNT: : +# ENTRYCOUNT: (Entry count: 1000): + +## Check the case where we have entry points and block frequency information + +# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: llvm-objdump %t2 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --check-prefix=ENTRYCOUNT-BLOCKFREQ + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text.foo + Type: SHT_PROGBITS + Address: 0x0 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '503b0505200000907d02ebf5c3' + - Name: .llvm_bb_addr_map.foo + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.foo + Entries: + - Version: 2 + Address: 0x0 + Feature: 0x3 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + PGOAnalyses: + - FuncEntryCount: 1000 + PGOBBEntries: + - BBFreq: 1000 + - BBFreq: 133 + - BBFreq: 18 + - BBFreq: 1000 +Symbols: + - Name: foo + Section: .text.foo + Value: 0x0 + +# ENTRYCOUNT-BLOCKFREQ: : +# ENTRYCOUNT-BLOCKFREQ: (Entry count: 1000, Frequency: 1000): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 133): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 18): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 1000): + +## Check the case where we have entry points, block frequency, and branch +## proabability information. + +# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: llvm-objdump %t3 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --check-prefix=ENTRY-FREQ-PROB + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text.foo + Type: SHT_PROGBITS + Address: 0x0 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '503b0505200000907d02ebf5c3' + - Name: .llvm_bb_addr_map.foo + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.foo + Entries: + - Version: 2 + Address: 0x0 + Feature: 0x7 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + PGOAnalyses: + - FuncEntryCount: 1000 + PGOBBEntries: + - BBFreq: 1000 + Successors: + - ID: 1 + BrProb: 0x22222222 + - ID: 2 + BrProb: 0x33333333 + - ID: 3 + BrProb: 0xaaaaaaaa + - BBFreq: 133 + Successors: + - ID: 2 + BrProb: 0x11111111 + - ID: 3 + BrProb: 0xeeeeeeee + - BBFreq: 18 + Successors: + - ID: 3 + BrProb: 0xffffffff + - BBFreq: 1000 + Successors: [] +Symbols: + - Name: foo + Section: .text.foo + Value: 0x0 + +# ENTRY-FREQ-PROB: : +# ENTRY-FREQ-PROB: (Entry count: 1000, Frequency: 1000, Successors: BB1:22222222, BB2:33333333, BB3:aaaaaaaa): +# ENTRY-FREQ-PROB: (Frequency: 133, Successors: BB2:11111111, BB3:eeeeeeee): +# ENTRY-FREQ-PROB: (Frequency: 18, Successors: BB3:ffffffff): +# ENTRY-FREQ-PROB: (Frequency: 1000): diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index bee76d9c869a..f9f3f0a749a5 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1144,10 +1144,57 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj, return SymbolInfoTy(Addr, Name, Type); } -static void -collectBBAddrMapLabels(const std::unordered_map &AddrToBBAddrMap, - uint64_t SectionAddr, uint64_t Start, uint64_t End, - std::unordered_map> &Labels) { +struct BBAddrMapLabel { + std::string BlockLabel; + std::string PGOAnalysis; +}; + +static std::string constructPGOLabelString(const PGOAnalysisMap &PGOMap, + size_t BBEntryIndex) { + std::string PGOString; + raw_string_ostream PGOSS(PGOString); + + PGOSS << " ("; + if (PGOMap.FeatEnable.FuncEntryCount && BBEntryIndex == 0) { + PGOSS << "Entry count: " << Twine(PGOMap.FuncEntryCount); + if (PGOMap.FeatEnable.BBFreq || PGOMap.FeatEnable.BrProb) { + PGOSS << ", "; + } + } + + if (PGOMap.FeatEnable.BBFreq || PGOMap.FeatEnable.BrProb) { + assert(BBEntryIndex < PGOMap.BBEntries.size() && + "Expected PGOAnalysisMap and BBAddrMap to have the same entires"); + const PGOAnalysisMap::PGOBBEntry &PGOBBEntry = + PGOMap.BBEntries[BBEntryIndex]; + + if (PGOMap.FeatEnable.BBFreq) { + PGOSS << "Frequency: " << Twine(PGOBBEntry.BlockFreq.getFrequency()); + if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { + PGOSS << ", "; + } + } + if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { + PGOSS << "Successors: "; + interleaveComma( + PGOBBEntry.Successors, PGOSS, + [&PGOSS](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { + PGOSS << "BB" << SE.ID << ":"; + PGOSS.write_hex(SE.Prob.getNumerator()); + }); + } + } + PGOSS << ")"; + + return PGOString; +} + +static void collectBBAddrMapLabels( + const std::unordered_map &AddrToBBAddrMap, + const std::unordered_map &AddrToPGOAnalysisMap, + uint64_t SectionAddr, uint64_t Start, uint64_t End, + std::unordered_map> &Labels, + const StringRef FileName) { if (AddrToBBAddrMap.empty()) return; Labels.clear(); @@ -1156,11 +1203,21 @@ collectBBAddrMapLabels(const std::unordered_map &AddrToBBAd auto Iter = AddrToBBAddrMap.find(StartAddress); if (Iter == AddrToBBAddrMap.end()) return; - for (const BBAddrMap::BBEntry &BBEntry : Iter->second.getBBEntries()) { + auto PGOIter = AddrToPGOAnalysisMap.find(StartAddress); + + for (size_t I = 0; I < Iter->second.getBBEntries().size(); ++I) { + const BBAddrMap::BBEntry &BBEntry = Iter->second.getBBEntries()[I]; uint64_t BBAddress = BBEntry.Offset + Iter->second.getFunctionAddress(); if (BBAddress >= EndAddress) continue; - Labels[BBAddress].push_back(("BB" + Twine(BBEntry.ID)).str()); + + std::string LabelString = ("BB" + Twine(BBEntry.ID)).str(); + std::string PGOString; + + if (PGOIter != AddrToPGOAnalysisMap.end()) + PGOString = constructPGOLabelString(PGOIter->second, I); + + Labels[BBAddress].push_back({LabelString, PGOString}); } } @@ -1461,18 +1518,24 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, LLVM_DEBUG(LVP.dump()); std::unordered_map AddrToBBAddrMap; + std::unordered_map AddrToPGOAnalysisMap; auto ReadBBAddrMap = [&](std::optional SectionIndex = std::nullopt) { AddrToBBAddrMap.clear(); if (const auto *Elf = dyn_cast(&Obj)) { - auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex); + std::vector PGOAnalyses; + auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex, &PGOAnalyses); if (!BBAddrMapsOrErr) { reportWarning(toString(BBAddrMapsOrErr.takeError()), Obj.getFileName()); return; } - for (auto &FunctionBBAddrMap : *BBAddrMapsOrErr) - AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, - std::move(FunctionBBAddrMap)); + for (const auto &[FunctionBBAddrMap, FunctionPGOAnalysis] : + zip_equal(*std::move(BBAddrMapsOrErr), std::move(PGOAnalyses))) { + uint64_t Addr = FunctionBBAddrMap.Addr; + AddrToBBAddrMap.emplace(Addr, std::move(FunctionBBAddrMap)); + if (FunctionPGOAnalysis.FeatEnable.anyEnabled()) + AddrToPGOAnalysisMap.emplace(Addr, std::move(FunctionPGOAnalysis)); + } } }; @@ -1774,12 +1837,13 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, formatted_raw_ostream FOS(outs()); std::unordered_map AllLabels; - std::unordered_map> BBAddrMapLabels; + std::unordered_map> BBAddrMapLabels; if (SymbolizeOperands) { collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, SectionAddr, Index, End, AllLabels); - collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End, - BBAddrMapLabels); + collectBBAddrMapLabels(AddrToBBAddrMap, AddrToPGOAnalysisMap, + SectionAddr, Index, End, BBAddrMapLabels, + FileName); } while (Index < End) { @@ -1836,8 +1900,9 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, // Print local label if there's any. auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index); if (Iter1 != BBAddrMapLabels.end()) { - for (StringRef Label : Iter1->second) - FOS << "<" << Label << ">:\n"; + for (const auto &BBLabel : Iter1->second) + FOS << "<" << BBLabel.BlockLabel << ">" << BBLabel.PGOAnalysis + << ":\n"; } else { auto Iter2 = AllLabels.find(SectionAddr + Index); if (Iter2 != AllLabels.end()) @@ -1961,7 +2026,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, // the target address. *TargetOS << TargetName; } else if (BBAddrMapLabelAvailable) { - *TargetOS << BBAddrMapLabels[Target].front(); + *TargetOS << BBAddrMapLabels[Target].front().BlockLabel; } else if (LabelAvailable) { *TargetOS << AllLabels[Target]; } else { @@ -1971,7 +2036,8 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, } *TargetOS << ">"; } else if (BBAddrMapLabelAvailable) { - *TargetOS << " <" << BBAddrMapLabels[Target].front() << ">"; + *TargetOS << " <" << BBAddrMapLabels[Target].front().BlockLabel + << ">"; } else if (LabelAvailable) { *TargetOS << " <" << AllLabels[Target] << ">"; } -- Gitee From 54439b99f7799ecacbe7b019e2cb57eecf1a8bf8 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jan 2024 16:48:10 -0800 Subject: [PATCH 26/47] [AsmPrinter] Remove mbb-profile-dump flag (#76595) Now that the work embedding PGO information in SHT_LLVM_BB_ADDR_MAP ELF sections has landed, there is no longer a need to keep around the mbb-profile-dump flag. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 4 -- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 51 --------------- .../CodeGen/MLRegalloc/bb-profile-dump.ll | 62 ------------------- 3 files changed, 117 deletions(-) delete mode 100644 llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 0ac497c5f8ef..571bc0118c69 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -236,10 +236,6 @@ private: /// split stack prologue. bool HasNoSplitStack = false; - /// Raw FDOstream for outputting machine basic block frequncies if the - /// --mbb-profile-dump flag is set for downstream cost modelling applications - std::unique_ptr MBBProfileDumpFileOutput; - protected: explicit AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 59ba672d08e6..4eeebe5fc734 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -132,13 +132,6 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -static cl::opt BasicBlockProfileDump( - "mbb-profile-dump", cl::Hidden, - cl::desc("Basic block profile dump for external cost modelling. If " - "matching up BBs with afterwards, the compilation must be " - "performed with -basic-block-sections=labels. Enabling this " - "flag during in-process ThinLTO is not supported.")); - // This is a replication of fields of object::PGOAnalysisMap::Features. It // should match the order of the fields so that // `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())` @@ -623,16 +616,6 @@ bool AsmPrinter::doInitialization(Module &M) { HI.Handler->beginModule(&M); } - if (!BasicBlockProfileDump.empty()) { - std::error_code PossibleFileError; - MBBProfileDumpFileOutput = std::make_unique( - BasicBlockProfileDump, PossibleFileError); - if (PossibleFileError) { - M.getContext().emitError("Failed to open file for MBB Profile Dump: " + - PossibleFileError.message() + "\n"); - } - } - return false; } @@ -1998,40 +1981,6 @@ void AsmPrinter::emitFunctionBody() { OutStreamer->getCommentOS() << "-- End function\n"; OutStreamer->addBlankLine(); - - // Output MBB ids, function names, and frequencies if the flag to dump - // MBB profile information has been set - if (MBBProfileDumpFileOutput && !MF->empty() && - MF->getFunction().getEntryCount()) { - if (!MF->hasBBLabels()) { - MF->getContext().reportError( - SMLoc(), - "Unable to find BB labels for MBB profile dump. -mbb-profile-dump " - "must be called with -basic-block-sections=labels"); - } else { - MachineBlockFrequencyInfo &MBFI = - getAnalysis().getBFI(); - // The entry count and the entry basic block frequency aren't the same. We - // want to capture "absolute" frequencies, i.e. the frequency with which a - // MBB is executed when the program is executed. From there, we can derive - // Function-relative frequencies (divide by the value for the first MBB). - // We also have the information about frequency with which functions - // were called. This helps, for example, in a type of integration tests - // where we want to cross-validate the compiler's profile with a real - // profile. - // Using double precision because uint64 values used to encode mbb - // "frequencies" may be quite large. - const double EntryCount = - static_cast(MF->getFunction().getEntryCount()->getCount()); - for (const auto &MBB : *MF) { - const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB); - const double AbsMBBFreq = MBBRelFreq * EntryCount; - *MBBProfileDumpFileOutput.get() - << MF->getName() << "," << MBB.getBBID()->BaseID << "," - << AbsMBBFreq << "\n"; - } - } - } } /// Compute the number of Global Variables that uses a Constant. diff --git a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll b/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll deleted file mode 100644 index cc6332422af5..000000000000 --- a/llvm/test/CodeGen/MLRegalloc/bb-profile-dump.ll +++ /dev/null @@ -1,62 +0,0 @@ -; REQUIRES: x86-registered-target -; -; Check that the basic block profile dump outputs data and in the correct -; format. -; -; RUN: llc -mtriple=x86_64-linux-unknown -o /dev/null -basic-block-sections=labels -mbb-profile-dump=- %s | FileCheck %s - -; bb profile dump is not supported on NVPTX -; UNSUPPORTED: target=nvptx{{.*}} - -; Check that given a simple case, we can return the default MBFI - -define i64 @f2(i64 %a, i64 %b) !prof !1{ - %sum = add i64 %a, %b - ret i64 %sum -} - -; CHECK: f2,0,1.000000e+03 - -define i64 @f1() !prof !2{ - %sum = call i64 @f2(i64 2, i64 2) - %isEqual = icmp eq i64 %sum, 4 - br i1 %isEqual, label %ifEqual, label %ifNotEqual, !prof !3 -ifEqual: - ret i64 0 -ifNotEqual: - ret i64 %sum -} - -; CHECK-NEXT: f1,0,1.000000e+01 -; CHECK-NEXT: f1,2,6.000000e+00 -; CHECK-NEXT: f1,1,4.000000e+00 - -define void @f3(i32 %iter) !prof !4 { -entry: - br label %loop -loop: - %i = phi i32 [0, %entry], [%i_next, %loop] - %i_next = add i32 %i, 1 - %exit_cond = icmp slt i32 %i_next, %iter - br i1 %exit_cond, label %loop, label %exit, !prof !5 -exit: - ret void -} - -; CHECK-NEXT: f3,0,2.000000e+00 -; CHECK-NEXT: f3,1,2.002000e+03 -; CHECK-NEXT: f3,2,2.000000e+00 - -!1 = !{!"function_entry_count", i64 1000} -!2 = !{!"function_entry_count", i64 10} -!3 = !{!"branch_weights", i32 2, i32 3} -!4 = !{!"function_entry_count", i64 2} -!5 = !{!"branch_weights", i32 1000, i32 1} - -; Check that if we pass -mbb-profile-dump but don't set -basic-block-sections, -; we get an appropriate error message - -; RUN: not llc -mtriple=x86_64-linux-unknown -o /dev/null -mbb-profile-dump=- %s 2>&1 | FileCheck --check-prefix=NO-SECTIONS %s - -; NO-SECTIONS: :0: error: Unable to find BB labels for MBB profile dump. -mbb-profile-dump must be called with -basic-block-sections=labels - -- Gitee From 812ceb741aa9e9547cb739efe0494c35ca9f3b7c Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Wed, 24 Jan 2024 17:26:48 -0500 Subject: [PATCH 27/47] [SHT_LLVM_BB_ADDR_MAP] Avoids side-effects in addition since order is unspecified. (#79168) Turns out the problem with https://github.com/llvm/llvm-project/issues/60013 is due to the fact that order of operation is unspecified in C++: https://en.cppreference.com/w/cpp/language/eval_order. A small example of where this manifests with MSVC can be seen here https://ooo.godbolt.org/z/bxqKeqzqn. This patch does the following: * Removes the addition operations where we sequence more than one side-effect based expression. * Removes test guards to now run on Windows --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 12 +++++++----- ...-bbaddrmap-disassemble-symbolize-operands.yaml | 3 --- .../X86/elf-bbaddrmap-symbolize-relocatable.yaml | 3 --- .../llvm-readobj/ELF/bb-addr-map-relocatable.test | 3 --- llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test | 3 --- llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml | 3 --- llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml | 3 --- llvm/unittests/Object/ELFObjectFileTest.cpp | 15 --------------- 8 files changed, 7 insertions(+), 38 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 917eb2a60428..949e3b5ead29 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1439,9 +1439,9 @@ void ELFState::writeSectionContent( for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) { if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1) SHeader.sh_size += CBA.writeULEB128(BBE.ID); - SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) + - CBA.writeULEB128(BBE.Size) + - CBA.writeULEB128(BBE.Metadata); + SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset); + SHeader.sh_size += CBA.writeULEB128(BBE.Size); + SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); } } @@ -1469,8 +1469,10 @@ void ELFState::writeSectionContent( SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); if (PGOBBE.Successors) { SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); - for (const auto &[ID, BrProb] : *PGOBBE.Successors) - SHeader.sh_size += CBA.writeULEB128(ID) + CBA.writeULEB128(BrProb); + for (const auto &[ID, BrProb] : *PGOBBE.Successors) { + SHeader.sh_size += CBA.writeULEB128(ID); + SHeader.sh_size += CBA.writeULEB128(BrProb); + } } } } diff --git a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml index 2e9eb9af3561..6864932411ae 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml @@ -1,9 +1,6 @@ ## Test that in the presence of SHT_LLVM_BB_ADDR_MAP sections, ## --symbolize-operands can display labels. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - ## Executable object file. # RUN: yaml2obj --docnum=1 -DFOO_ADDR=0x4000 -DBAR_ADDR=0x5000 %s -o %t1 # RUN: llvm-objdump %t1 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ diff --git a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml index d5a618467bd2..f796963481cd 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml @@ -2,9 +2,6 @@ ## --symbolize-operands can display labels properly in a relocatable ## object file. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - ## Relocatable Object file. # RUN: yaml2obj %s -o %t1 # RUN: llvm-objdump %t1 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test index 6ac27941853f..e6b6cc344a8e 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test @@ -1,9 +1,6 @@ ## This test checks how we handle the --bb-addr-map option on relocatable ## object files. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - # RUN: yaml2obj %s -o %t1.o # RUN: llvm-readobj %t1.o --bb-addr-map | FileCheck %s diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test index 928057017510..0593f04d6e30 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test @@ -1,8 +1,5 @@ ## This test checks how we handle the --bb-addr-map option. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - ## Check 64-bit: # RUN: yaml2obj --docnum=1 %s -DBITS=64 -DADDR=0x999999999 -o %t1.x64.o # RUN: llvm-readobj %t1.x64.o --bb-addr-map 2>&1 | FileCheck %s -DADDR=0x999999999 -DFILE=%t1.x64.o --check-prefix=CHECK diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml index eceb42f6598f..629c29e202ae 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml @@ -1,8 +1,5 @@ ## Check how obj2yaml produces YAML .llvm_bb_addr_map descriptions. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - ## Check that obj2yaml uses the "Entries" tag to describe an .llvm_bb_addr_map section. # RUN: yaml2obj --docnum=1 %s -o %t1 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 73c916808417..2086dc53208b 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -1,8 +1,5 @@ ## Check how yaml2obj produces .llvm_bb_addr_map sections. -## Fails on windows (https://github.com/llvm/llvm-project/issues/60013). -# UNSUPPORTED: system-windows - # RUN: yaml2obj --docnum=1 %s -o %t1 # RUN: llvm-readobj --sections --section-data %t1 | FileCheck %s diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index ad85371d58ee..91f5d9356140 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -21,13 +21,6 @@ using namespace llvm; using namespace llvm::object; -// Used to skip LLVM_BB_ADDR_MAP tests on windows platforms due to -// https://github.com/llvm/llvm-project/issues/60013. -bool IsHostWindows() { - Triple Host(Triple::normalize(sys::getProcessTriple())); - return Host.isOSWindows(); -} - namespace { // A struct to initialize a buffer to represent an ELF object file. @@ -475,8 +468,6 @@ Sections: // Tests for error paths of the ELFFile::decodeBBAddrMap API. TEST(ELFObjectFileTest, InvalidDecodeBBAddrMap) { - if (IsHostWindows()) - GTEST_SKIP(); StringRef CommonYamlString(R"( --- !ELF FileHeader: @@ -623,8 +614,6 @@ Sections: // Test for the ELFObjectFile::readBBAddrMap API. TEST(ELFObjectFileTest, ReadBBAddrMap) { - if (IsHostWindows()) - GTEST_SKIP(); StringRef CommonYamlString(R"( --- !ELF FileHeader: @@ -783,8 +772,6 @@ Sections: // Tests for error paths of the ELFFile::decodeBBAddrMap with PGOAnalysisMap // API. TEST(ELFObjectFileTest, InvalidDecodePGOAnalysisMap) { - if (IsHostWindows()) - GTEST_SKIP(); StringRef CommonYamlString(R"( --- !ELF FileHeader: @@ -915,8 +902,6 @@ Sections: // Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap. TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { - if (IsHostWindows()) - GTEST_SKIP(); StringRef CommonYamlString(R"( --- !ELF FileHeader: -- Gitee From 984da41962eefe817a5be373d508aa1ab8f96658 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Wed, 24 Jan 2024 21:12:02 -0500 Subject: [PATCH 28/47] [llvm] Silence warning when building with Clang ToT This fixes: ``` [1343/7452] Building CXX object lib\Object\CMakeFiles\LLVMObject.dir\ELFObjectFile.cpp.obj C:\git\llvm-project\llvm\lib\Object\ELFObjectFile.cpp(808,27): warning: comparison of integers of different signs: 'unsigned int' and '_Iter_diff_t> *>' (aka 'int') [-Wsign-compare] 808 | if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr)) | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:\git\llvm-project\llvm\lib\Object\ELFObjectFile.cpp(913,12): note: in instantiation of function template specialization 'readBBAddrMapImpl>' requested here 913 | return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex, PGOAnalyses); | ^ ``` --- llvm/lib/Object/ELFObjectFile.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 7503e2efdafc..d84592bc40fe 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -730,7 +730,10 @@ Expected> static readBBAddrMapImpl( return createError("unable to get the linked-to section for " + describe(EF, Sec) + ": " + toString(TextSecOrErr.takeError())); - if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr)) + assert(*TextSecOrErr >= Sections.begin() && + "Text section pointer outside of bounds"); + if (*TextSectionIndex != + (unsigned)std::distance(Sections.begin(), *TextSecOrErr)) return false; return true; }; -- Gitee From 3acea926ee14ca1db3f2169b9b51f1510479ecf4 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 25 Jan 2024 13:03:18 -0800 Subject: [PATCH 29/47] [llvm-objdump,SHT_LLVM_BB_ADDR_MAP,NFC] Use auto && instead of const auto & to allow moving from BBAddrMap objects. (#79456) std::move on `const auto &` references is essentially a noop. Changing to `auto &&` to actually allow moving. --- llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index f9f3f0a749a5..ff71d91e07fb 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1529,7 +1529,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, reportWarning(toString(BBAddrMapsOrErr.takeError()), Obj.getFileName()); return; } - for (const auto &[FunctionBBAddrMap, FunctionPGOAnalysis] : + for (auto &&[FunctionBBAddrMap, FunctionPGOAnalysis] : zip_equal(*std::move(BBAddrMapsOrErr), std::move(PGOAnalyses))) { uint64_t Addr = FunctionBBAddrMap.Addr; AddrToBBAddrMap.emplace(Addr, std::move(FunctionBBAddrMap)); -- Gitee From f9a27ad0475287d29cefff6abcf9a4356e42025e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 21 Aug 2023 15:36:45 +0000 Subject: [PATCH 30/47] Define BBEntry::hasIndirectBranch. NFC Differential Revision: https://reviews.llvm.org/D158429 --- llvm/include/llvm/Object/ELFTypes.h | 1 + llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 84f026a926d6..cdab7ae13704 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -857,6 +857,7 @@ struct BBAddrMap { bool hasTailCall() const { return MD.HasTailCall; } bool isEHPad() const { return MD.IsEHPad; } bool canFallThrough() const { return MD.CanFallThrough; } + bool hasIndirectBranch() const { return MD.HasIndirectBranch; } }; BBAddrMap(uint64_t Addr, std::vector BBEntries) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index bc8e0413b339..902f6c4cbc72 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7471,7 +7471,7 @@ template void LLVMELFDumper::printBBAddrMaps() { W.printBoolean("HasTailCall", BBE.hasTailCall()); W.printBoolean("IsEHPad", BBE.isEHPad()); W.printBoolean("CanFallThrough", BBE.canFallThrough()); - W.printBoolean("HasIndirectBranch", BBE.MD.HasIndirectBranch); + W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); } } } -- Gitee From ec8007f88eda1cbfffbc20b97115084398ccd9d3 Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Thu, 1 Feb 2024 16:02:14 -0500 Subject: [PATCH 31/47] [SHT_LLVM_BB_ADDR_MAP][llvm-readobj] Implements llvm-readobj handling for PGOAnalysisMap. (#79520) Adds raw printing of PGOAnalysisMap in llvm-readobj. I'm leaving the fixme's for a later patch that will provide a 'pretty' printing for BBFreq and BrProb (i.e. relative frequencies and probabilities) that will apply to both llvm-readobj and llvm-objdump. --- .../ELF/bb-addr-map-pgo-analysis-map.test | 214 ++++++++++++++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 61 +++-- 2 files changed, 262 insertions(+), 13 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test new file mode 100644 index 000000000000..91b3d7e3902e --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -0,0 +1,214 @@ +## This test checks how llvm-readobj prints the PGO Analysis Map with the +## --bb-addr-map option. + +## Check 64-bit: +# RUN: yaml2obj %s -DBITS=64 -DADDR=0x999999999 -o %t1.x64.o +# RUN: llvm-readobj %t1.x64.o --bb-addr-map 2>&1 | FileCheck %s -DADDR=0x999999999 -DFILE=%t1.x64.o --check-prefix=CHECK +# RUN: llvm-readelf %t1.x64.o --bb-addr-map | FileCheck %s --check-prefix=GNU + +## Check 32-bit: +# RUN: yaml2obj %s -DBITS=32 -o %t1.x32.o +# RUN: llvm-readobj %t1.x32.o --bb-addr-map 2>&1 | FileCheck -DADDR=0x11111 %s -DFILE=%t1.x32.o --check-prefix=CHECK +# RUN: llvm-readelf %t1.x32.o --bb-addr-map | FileCheck %s --check-prefix=GNU + +## Check that a malformed section can be handled. +# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED + +## Check that missing features can be handled. +# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o +# RUN: llvm-readobj %t3.o --bb-addr-map 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=INVALIDFT + +# CHECK: BBAddrMap [ +# CHECK-NEXT: Function { +# CHECK-NEXT: At: [[ADDR]] +# CHECK-NEXT: warning: '[[FILE]]': could not identify function symbol for address ([[ADDR]]) in SHT_LLVM_BB_ADDR_MAP section with index 3 +# CHECK-NEXT: Name: +# CHECK-NEXT: BB entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0x1 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: Yes +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: { +# CHECK-NEXT: ID: 2 +# CHECK-NEXT: Offset: 0x4 +# CHECK-NEXT: Size: 0x4 +# CHECK-NEXT: HasReturn: Yes +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: Yes +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: Yes +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: PGO analyses { +# CHECK-NEXT: FuncEntryCount: 100 +# CHECK-NEXT: PGO BB entries [ +# CHECK-NEXT: { +# CHECK-NEXT: Frequency: 100 +# CHECK-NEXT: Successors [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 2 +# CHECK-NEXT: Probability: 0xFFFFFFFF +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: { +# CHECK-NEXT: Frequency: 100 +# CHECK-NEXT: Successors [ +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: Function { +# CHECK-NEXT: At: 0x22222 +# CHECK-NEXT: Name: foo +# CHECK-NEXT: BB entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 4 +# CHECK-NEXT: Offset: 0x6 +# CHECK-NEXT: Size: 0x7 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: Yes +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: PGO analyses { +# CHECK-NEXT: FuncEntryCount: 8888 +# CHECK-NEXT: PGO BB entries [ +# CHECK-NEXT: { +# CHECK-NEXT: Frequency: 9000 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: ] + +# GNU: GNUStyle::printBBAddrMaps not implemented + +# TRUNCATED: BBAddrMap [ +# TRUNCATED-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: unable to decode LEB128 at offset [[OFFSET]]: malformed uleb128, extends past end +# TRUNCATED-NEXT: ] +## Check that the other valid section is properly dumped. +# TRUNCATED-NEXT: BBAddrMap [ +# TRUNCATED-NEXT: Function { +# TRUNCATED-NEXT: At: 0x33333 +# TRUNCATED-NEXT: Name: bar +# TRUNCATED-NEXT: BB entries [ +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 6 +# TRUNCATED-NEXT: Offset: 0x9 +# TRUNCATED-NEXT: Size: 0xA +# TRUNCATED-NEXT: HasReturn: Yes +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: No +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: Yes +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 7 +# TRUNCATED-NEXT: Offset: 0x1F +# TRUNCATED-NEXT: Size: 0xD +# TRUNCATED-NEXT: HasReturn: No +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: Yes +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: No +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: ] +# TRUNCATED-NEXT: PGO analyses { +# TRUNCATED-NEXT: FuncEntryCount: 89 +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: ] + +# INVALIDFT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 5: unable to decode LEB128 at offset 0x00000010: malformed uleb128, extends past end + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [SHF_ALLOC] + - Name: .text.bar + Type: SHT_PROGBITS + Flags: [SHF_ALLOC] + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + ShSize: [[SIZE=]] + Link: .text + Entries: + - Version: 2 + Feature: 0x7 + Address: [[ADDR=0x11111]] + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + - ID: 2 + AddressOffset: 0x3 + Size: 0x4 + Metadata: 0x15 + - Version: 2 + Feature: 0x3 + Address: 0x22222 + BBEntries: + - ID: 4 + AddressOffset: 0x6 + Size: 0x7 + Metadata: 0x8 + PGOAnalyses: + - FuncEntryCount: 100 + PGOBBEntries: + - BBFreq: 100 + Successors: + - ID: 2 + BrProb: 0xFFFFFFFF + - BBFreq: 100 + Successors: [] + - FuncEntryCount: 8888 + PGOBBEntries: + - BBFreq: 9000 + - Name: dummy_section + Type: SHT_PROGBITS + Size: 16 + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.bar + Entries: + - Version: 2 + Feature: [[FEATURE=0x1]] + Address: 0x33333 + BBEntries: + - ID: 6 + AddressOffset: 0x9 + Size: 0xa + Metadata: 0x1b + - ID: 7 + AddressOffset: 0xc + Size: 0xd + Metadata: 0xe + PGOAnalyses: + - FuncEntryCount: 89 +Symbols: + - Name: foo + Section: .text + Type: STT_FUNC + Value: 0x22222 + - Name: bar + Section: .text.bar + Type: STT_FUNC + Value: 0x33333 + diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 902f6c4cbc72..815fe31d4a82 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7440,14 +7440,15 @@ template void LLVMELFDumper::printBBAddrMaps() { this->describe(*Sec)); continue; } + std::vector PGOAnalyses; Expected> BBAddrMapOrErr = - this->Obj.decodeBBAddrMap(*Sec, RelocSec); + this->Obj.decodeBBAddrMap(*Sec, RelocSec, &PGOAnalyses); if (!BBAddrMapOrErr) { this->reportUniqueWarning("unable to dump " + this->describe(*Sec) + ": " + toString(BBAddrMapOrErr.takeError())); continue; } - for (const BBAddrMap &AM : *BBAddrMapOrErr) { + for (const auto &[AM, PAM] : zip_equal(*BBAddrMapOrErr, PGOAnalyses)) { DictScope D(W, "Function"); W.printHex("At", AM.Addr); SmallVector FuncSymIndex = @@ -7461,17 +7462,51 @@ template void LLVMELFDumper::printBBAddrMaps() { FuncName = this->getStaticSymbolName(FuncSymIndex.front()); W.printString("Name", FuncName); - ListScope L(W, "BB entries"); - for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) { - DictScope L(W); - W.printNumber("ID", BBE.ID); - W.printHex("Offset", BBE.Offset); - W.printHex("Size", BBE.Size); - W.printBoolean("HasReturn", BBE.hasReturn()); - W.printBoolean("HasTailCall", BBE.hasTailCall()); - W.printBoolean("IsEHPad", BBE.isEHPad()); - W.printBoolean("CanFallThrough", BBE.canFallThrough()); - W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); + { + ListScope L(W, "BB entries"); + for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) { + DictScope L(W); + W.printNumber("ID", BBE.ID); + W.printHex("Offset", BBE.Offset); + W.printHex("Size", BBE.Size); + W.printBoolean("HasReturn", BBE.hasReturn()); + W.printBoolean("HasTailCall", BBE.hasTailCall()); + W.printBoolean("IsEHPad", BBE.isEHPad()); + W.printBoolean("CanFallThrough", BBE.canFallThrough()); + W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); + } + } + + if (PAM.FeatEnable.anyEnabled()) { + DictScope PD(W, "PGO analyses"); + + if (PAM.FeatEnable.FuncEntryCount) + W.printNumber("FuncEntryCount", PAM.FuncEntryCount); + + if (PAM.FeatEnable.BBFreq || PAM.FeatEnable.BrProb) { + ListScope L(W, "PGO BB entries"); + for (const PGOAnalysisMap::PGOBBEntry &PBBE : PAM.BBEntries) { + DictScope L(W); + + /// FIXME: currently we just emit the raw frequency, it may be + /// better to provide an option to scale it by the first entry + /// frequence using BlockFrequency::Scaled64 number + if (PAM.FeatEnable.BBFreq) + W.printNumber("Frequency", PBBE.BlockFreq.getFrequency()); + + if (PAM.FeatEnable.BrProb) { + ListScope L(W, "Successors"); + for (const auto &Succ : PBBE.Successors) { + DictScope L(W); + W.printNumber("ID", Succ.ID); + /// FIXME: currently we just emit the raw numerator of the + /// probably, it may be better to provide an option to emit it + /// as a percentage or other prettied representation + W.printHex("Probability", Succ.Prob.getNumerator()); + } + } + } + } } } } -- Gitee From d1d5ad5ab59cd76b0c6a9da617e7b2dd8d12b271 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 1 Feb 2024 17:50:46 -0800 Subject: [PATCH 32/47] [SHT_LLVM_BB_ADDR_MAP] Allow basic-block-sections and labels be used together by decoupling the handling of the two features. (#74128) Today `-split-machine-functions` and `-fbasic-block-sections={all,list}` cannot be combined with `-basic-block-sections=labels` (the labels option will be ignored). The inconsistency comes from the way basic block address map -- the underlying mechanism for basic block labels -- encodes basic block addresses (https://lists.llvm.org/pipermail/llvm-dev/2020-July/143512.html). Specifically, basic block offsets are computed relative to the function begin symbol. This relies on functions being contiguous which is not the case for MFS and basic block section binaries. This means Propeller cannot use binary profiles collected from these binaries, which limits the applicability of Propeller for iterative optimization. To make the `SHT_LLVM_BB_ADDR_MAP` feature work with basic block section binaries, we propose modifying the encoding of this section as follows. First let us review the current encoding which emits the address of each function and its number of basic blocks, followed by basic block entries for each basic block. | | | |--|--| | Address of the function | Function Address | | Number of basic blocks in this function | NumBlocks | | BB entry 1 | BB entry 2 | ... | BB entry #NumBlocks To make this work for basic block sections, we treat each basic block section similar to a function, except that basic block sections of the same function must be encapsulated in the same structure so we can map all of them to their single function. We modify the encoding to first emit the number of basic block sections (BB ranges) in the function. Then we emit the address map of each basic block section section as before: the base address of the section, its number of blocks, and BB entries for its basic block. The first section in the BB address map is always the function entry section. | | | |--|--| | Number of sections for this function | NumBBRanges | | Section 1 begin address | BaseAddress[1] | | Number of basic blocks in section 1 | NumBlocks[1] | | BB entries for Section 1 |..................| | Section #NumBBRanges begin address | BaseAddress[NumBBRanges] | | Number of basic blocks in section #NumBBRanges | NumBlocks[NumBBRanges] | | BB entries for Section #NumBBRanges The encoding of basic block entries remains as before with the minor change that each basic block offset is now computed relative to the begin symbol of its containing BB section. This patch adds a new boolean codegen option `-basic-block-address-map`. Correspondingly, the front-end flag `-fbasic-block-address-map` and LLD flag `--lto-basic-block-address-map` are introduced. Analogously, we add a new TargetOption field `BBAddrMap`. This means BB address maps are either generated for all functions in the compiling unit, or for none (depending on `TargetOptions::BBAddrMap`). This patch keeps the functionality of the old `-fbasic-block-sections=labels` option but does not remove it. A subsequent patch will remove the obsolete option. We refactor the `BasicBlockSections` pass by separating the BB address map and BB sections handing to their own functions (named `handleBBAddrMap` and `handleBBSections`). `handleBBSections` renumbers basic blocks and places them in their assigned sections. `handleBBAddrMap` is invoked after `handleBBSections` (if requested) and only renumbers the blocks. - New tests added: - Two tests basic-block-address-map-with-basic-block-sections.ll and basic-block-address-map-with-mfs.ll to exercise the combination of `-basic-block-address-map` with `-basic-block-sections=list` and '-split-machine-functions`. - A driver sanity test for the `-fbasic-block-address-map` option (basic-block-address-map.c). - An LLD test for testing the `--lto-basic-block-address-map` option. This reuses the LLVM IR from `lld/test/ELF/lto/basic-block-sections.ll`. - Renamed and modified the two existing codegen tests for basic block address map (`basic-block-sections-labels-functions-sections.ll` and `basic-block-sections-labels.ll`) - Removed `SHT_LLVM_BB_ADDR_MAP_V0` tests. Full deprecation of `SHT_LLVM_BB_ADDR_MAP_V0` and `SHT_LLVM_BB_ADDR_MAP` version less than 2 will happen in a separate PR in a few months. --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 + clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 11 + clang/test/Driver/basic-block-address-map.c | 8 + lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 3 + lld/ELF/LTO.cpp | 2 + lld/ELF/Options.td | 3 + lld/test/ELF/lto/basic-block-address-map.ll | 28 ++ llvm/include/llvm/CodeGen/CommandFlags.h | 2 + llvm/include/llvm/Object/ELFTypes.h | 146 ++++-- llvm/include/llvm/ObjectYAML/ELFYAML.h | 27 +- llvm/include/llvm/Target/TargetOptions.h | 6 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 91 +++- llvm/lib/CodeGen/BasicBlockSections.cpp | 39 +- llvm/lib/CodeGen/CommandFlags.cpp | 7 + llvm/lib/CodeGen/MachineFunction.cpp | 1 + llvm/lib/CodeGen/TargetPassConfig.cpp | 28 +- llvm/lib/Object/ELF.cpp | 132 +++-- llvm/lib/ObjectYAML/ELFEmitter.cpp | 58 ++- llvm/lib/ObjectYAML/ELFYAML.cpp | 9 +- ...ic-block-address-map-function-sections.ll} | 1 + ...k-address-map-with-basic-block-sections.ll | 71 +++ .../X86/basic-block-address-map-with-mfs.ll | 90 ++++ ...s-labels.ll => basic-block-address-map.ll} | 2 + ...ddrmap-disassemble-symbolize-operands.yaml | 214 +++++--- .../elf-bbaddrmap-symbolize-relocatable.yaml | 50 +- .../llvm-objdump/X86/elf-pgoanalysismap.yaml | 87 ++-- .../ELF/bb-addr-map-pgo-analysis-map.test | 164 ++++--- .../ELF/bb-addr-map-relocatable.test | 107 ++-- .../tools/llvm-readobj/ELF/bb-addr-map.test | 272 ++++------- llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml | 218 +++------ llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml | 71 ++- llvm/tools/llvm-objdump/llvm-objdump.cpp | 206 +++++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 43 +- llvm/tools/obj2yaml/elf2yaml.cpp | 46 +- llvm/unittests/Object/ELFObjectFileTest.cpp | 461 +++++++++++------- llvm/unittests/Object/ELFTypesTest.cpp | 38 +- 39 files changed, 1698 insertions(+), 1051 deletions(-) create mode 100644 clang/test/Driver/basic-block-address-map.c create mode 100644 lld/test/ELF/lto/basic-block-address-map.ll rename llvm/test/CodeGen/X86/{basic-block-sections-labels-functions-sections.ll => basic-block-address-map-function-sections.ll} (94%) create mode 100644 llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll create mode 100644 llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll rename llvm/test/CodeGen/X86/{basic-block-sections-labels.ll => basic-block-address-map.ll} (90%) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index d492b8681c5d..c9784fd6e83e 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -95,6 +95,7 @@ ENUM_CODEGENOPT(InlineAsmDialect, InlineAsmDialectKind, 1, IAD_ATT) CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables ///< are required. CODEGENOPT(FunctionSections , 1, 0) ///< Set when -ffunction-sections is enabled. +CODEGENOPT(BBAddrMap , 1, 0) ///< Set when -fbasic-block-address-map is enabled. CODEGENOPT(InstrumentFunctions , 1, 0) ///< Set when -finstrument-functions is ///< enabled. CODEGENOPT(InstrumentFunctionsAfterInlining , 1, 0) ///< Set when diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c109d7a8fcab..502fa2722023 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3324,6 +3324,10 @@ defm function_sections : BoolFOption<"function-sections", CodeGenOpts<"FunctionSections">, DefaultFalse, PosFlag, NegFlag>; +defm basic_block_address_map : BoolFOption<"basic-block-address-map", + CodeGenOpts<"BBAddrMap">, DefaultFalse, + PosFlag, + NegFlag>; def fbasic_block_sections_EQ : Joined<["-"], "fbasic-block-sections=">, Group, Flags<[CC1Option, CC1AsOption]>, HelpText<"Place each function's basic blocks in unique sections (ELF Only)">, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cef5e0d16ba7..6ba190d527ba 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -408,6 +408,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, LangOptions::FPModeKind::FPM_FastHonorPragmas); Options.ApproxFuncFPMath = LangOpts.ApproxFunc; + Options.BBAddrMap = CodeGenOpts.BBAddrMap; Options.BBSections = llvm::StringSwitch(CodeGenOpts.BBSections) .Case("all", llvm::BasicBlockSection::All) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e55cee095e32..e0cc4f9e5de6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5961,6 +5961,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-ffunction-sections"); } + if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_address_map, + options::OPT_fno_basic_block_address_map)) { + if (Triple.isX86() && Triple.isOSBinFormatELF()) { + if (A->getOption().matches(options::OPT_fbasic_block_address_map)) + A->render(Args, CmdArgs); + } else { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + } + } + if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) { StringRef Val = A->getValue(); if (Triple.isX86() && Triple.isOSBinFormatELF()) { diff --git a/clang/test/Driver/basic-block-address-map.c b/clang/test/Driver/basic-block-address-map.c new file mode 100644 index 000000000000..022f972b412d --- /dev/null +++ b/clang/test/Driver/basic-block-address-map.c @@ -0,0 +1,8 @@ +// RUN: %clang -### -target x86_64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s +// CHECK-PRESENT: -fbasic-block-address-map + +// RUN: %clang -### -target x86_64 -fno-basic-block-address-map %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT +// CHECK-ABSENT-NOT: -fbasic-block-address-map + +// RUN: not %clang -c -target x86_64-apple-darwin10 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s +// CHECK-TRIPLE: error: unsupported option '-fbasic-block-address-map' for target diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index bbf2d2015645..94af8c34beb7 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -183,6 +183,7 @@ struct Config { llvm::StringRef cmseOutputLib; StringRef zBtiReport = "none"; StringRef zCetReport = "none"; + bool ltoBBAddrMap; llvm::StringRef ltoBasicBlockSections; std::pair thinLTOObjectSuffixReplace; llvm::StringRef thinLTOPrefixReplaceOld; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index ffd0842b9078..e65dfa553de0 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1243,6 +1243,9 @@ static void readConfigs(opt::InputArgList &args) { config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBBAddrMap = + args.hasFlag(OPT_lto_basic_block_address_map, + OPT_no_lto_basic_block_address_map, false); config->ltoBasicBlockSections = args.getLastArgValue(OPT_lto_basic_block_sections); config->ltoUniqueBasicBlockSectionNames = diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index e8bfa903726d..a7df5f072f6f 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -84,6 +84,8 @@ static lto::Config createConfig() { c.Options.FunctionSections = true; c.Options.DataSections = true; + c.Options.BBAddrMap = config->ltoBBAddrMap; + // Check if basic block sections must be used. // Allowed values for --lto-basic-block-sections are "all", "labels", // "", or none. This is the equivalent diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 0d5c6c3d80a1..bb06d303d598 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -626,6 +626,9 @@ def save_temps_eq: JJ<"save-temps=">, HelpText<"Save select intermediate LTO com Values<"resolution,preopt,promote,internalize,import,opt,precodegen,prelink,combinedindex">; def lto_basic_block_sections: JJ<"lto-basic-block-sections=">, HelpText<"Enable basic block sections for LTO">; +defm lto_basic_block_address_map: BB<"lto-basic-block-address-map", + "Emit basic block address map for LTO", + "Do not emit basic block address map for LTO (default)">; defm lto_unique_basic_block_section_names: BB<"lto-unique-basic-block-section-names", "Give unique names to every basic block section for LTO", "Do not give unique names to every basic block section for LTO (default)">; diff --git a/lld/test/ELF/lto/basic-block-address-map.ll b/lld/test/ELF/lto/basic-block-address-map.ll new file mode 100644 index 000000000000..b96e7a8401f8 --- /dev/null +++ b/lld/test/ELF/lto/basic-block-address-map.ll @@ -0,0 +1,28 @@ +; REQUIRES: x86 +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t --lto-basic-block-address-map --lto-O0 --save-temps +; RUN: llvm-readobj --sections %t.lto.o | FileCheck --check-prefix=SECNAMES %s + +; SECNAMES: Type: SHT_LLVM_BB_ADDR_MAP + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i32 %b) local_unnamed_addr { +entry: + %tobool.not = icmp eq i32 %b, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @foo(i32 0) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +define void @_start() { + call void @foo(i32 1) + ret void +} diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index fa10ddd4447d..b79b370ef62a 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -154,6 +154,8 @@ struct RegisterCodeGenFlags { RegisterCodeGenFlags(); }; +bool getEnableBBAddrMap(); + llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options); /// Common utility function tightly tied to the options listed here. Initializes diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index cdab7ae13704..c413f0741a4f 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -798,6 +798,47 @@ template struct Elf_Mips_ABIFlags { // Struct representing the BBAddrMap for one function. struct BBAddrMap { + + // Bitfield of optional features to control the extra information + // emitted/encoded in the the section. + struct Features { + bool FuncEntryCount : 1; + bool BBFreq : 1; + bool BrProb : 1; + bool MultiBBRange : 1; + + bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; } + + bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; } + + // Encodes to minimum bit width representation. + uint8_t encode() const { + return (static_cast(FuncEntryCount) << 0) | + (static_cast(BBFreq) << 1) | + (static_cast(BrProb) << 2) | + (static_cast(MultiBBRange) << 3); + } + + // Decodes from minimum bit width representation and validates no + // unnecessary bits are used. + static Expected decode(uint8_t Val) { + Features Feat{ + static_cast(Val & (1 << 0)), static_cast(Val & (1 << 1)), + static_cast(Val & (1 << 2)), static_cast(Val & (1 << 3))}; + if (Feat.encode() != Val) + return createStringError( + std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", + Val); + return Feat; + } + + bool operator==(const Features &Other) const { + return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange) == + std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb, + Other.MultiBBRange); + } + }; + // Struct representing the BBAddrMap information for one basic block. struct BBEntry { struct Metadata { @@ -840,10 +881,11 @@ struct BBAddrMap { } }; - uint32_t ID; // Unique ID of this basic block. - uint32_t Offset; // Offset of basic block relative to function start. - uint32_t Size; // Size of the basic block. - Metadata MD; // Metdata for this basic block. + uint32_t ID = 0; // Unique ID of this basic block. + uint32_t Offset = 0; // Offset of basic block relative to the base address. + uint32_t Size = 0; // Size of the basic block. + Metadata MD = {false, false, false, false, + false}; // Metdata for this basic block. BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD) : ID(ID), Offset(Offset), Size(Size), MD(MD){}; @@ -860,62 +902,64 @@ struct BBAddrMap { bool hasIndirectBranch() const { return MD.HasIndirectBranch; } }; - BBAddrMap(uint64_t Addr, std::vector BBEntries) - : Addr(Addr), BBEntries(std::move(BBEntries)) {} + // Struct representing the BBAddrMap information for a contiguous range of + // basic blocks (a function or a basic block section). + struct BBRangeEntry { + uint64_t BaseAddress = 0; // Base address of the range. + std::vector BBEntries; // Basic block entries for this range. + + // Equality operator for unit testing. + bool operator==(const BBRangeEntry &Other) const { + return BaseAddress == Other.BaseAddress && + std::equal(BBEntries.begin(), BBEntries.end(), + Other.BBEntries.begin()); + } + }; - // Returns the address of the corresponding function. - uint64_t getFunctionAddress() const { return Addr; } + // All ranges for this function. Cannot be empty. The first range always + // corresponds to the function entry. + std::vector BBRanges; - // Returns the basic block entries for this function. - const std::vector &getBBEntries() const { return BBEntries; } + // Returns the function address associated with this BBAddrMap, which is + // stored as the `BaseAddress` of its first BBRangeEntry. + uint64_t getFunctionAddress() const { + assert(!BBRanges.empty()); + return BBRanges.front().BaseAddress; + } + + // Returns the total number of bb entries in all bb ranges. + size_t getNumBBEntries() const { + size_t NumBBEntries = 0; + for (const auto &BBR : BBRanges) + NumBBEntries += BBR.BBEntries.size(); + return NumBBEntries; + } + + // Returns the index of the bb range with the given base address, or + // `std::nullopt` if no such range exists. + std::optional + getBBRangeIndexForBaseAddress(uint64_t BaseAddress) const { + for (size_t I = 0; I < BBRanges.size(); ++I) + if (BBRanges[I].BaseAddress == BaseAddress) + return I; + return {}; + } + + // Returns bb entries in the first range. + const std::vector &getBBEntries() const { + return BBRanges.front().BBEntries; + } + + const std::vector &getBBRanges() const { return BBRanges; } // Equality operator for unit testing. bool operator==(const BBAddrMap &Other) const { - return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(), - Other.BBEntries.begin()); + return std::equal(BBRanges.begin(), BBRanges.end(), Other.BBRanges.begin()); } - - uint64_t Addr; // Function address - std::vector BBEntries; // Basic block entries for this function. }; /// A feature extension of BBAddrMap that holds information relevant to PGO. struct PGOAnalysisMap { - /// Bitfield of optional features to include in the PGO extended map. - struct Features { - bool FuncEntryCount : 1; - bool BBFreq : 1; - bool BrProb : 1; - - // True if at least one feature is enabled - bool anyEnabled() const { return FuncEntryCount || BBFreq || BrProb; } - - // Encodes to minimum bit width representation. - uint8_t encode() const { - return (static_cast(FuncEntryCount) << 0) | - (static_cast(BBFreq) << 1) | - (static_cast(BrProb) << 2); - } - - // Decodes from minimum bit width representation and validates no - // unnecessary bits are used. - static Expected decode(uint8_t Val) { - Features Feat{static_cast(Val & (1 << 0)), - static_cast(Val & (1 << 1)), - static_cast(Val & (1 << 2))}; - if (Feat.encode() != Val) - return createStringError( - std::error_code(), - "invalid encoding for PGOAnalysisMap::Features: 0x%x", Val); - return Feat; - } - - bool operator==(const Features &Other) const { - return std::tie(FuncEntryCount, BBFreq, BrProb) == - std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb); - } - }; - /// Extra basic block data with fields for block frequency and branch /// probability. struct PGOBBEntry { @@ -947,7 +991,7 @@ struct PGOAnalysisMap { std::vector BBEntries; // Extended basic block entries // Flags to indicate if each PGO related info was enabled in this function - Features FeatEnable; + BBAddrMap::Features FeatEnable; bool operator==(const PGOAnalysisMap &Other) const { return std::tie(FuncEntryCount, BBEntries, FeatEnable) == diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 12b47c271da2..8f045d638362 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -165,9 +165,21 @@ struct BBAddrMapEntry { }; uint8_t Version; llvm::yaml::Hex8 Feature; - llvm::yaml::Hex64 Address; - std::optional NumBlocks; - std::optional> BBEntries; + + struct BBRangeEntry { + llvm::yaml::Hex64 BaseAddress; + std::optional NumBlocks; + std::optional> BBEntries; + }; + + std::optional NumBBRanges; + std::optional> BBRanges; + + llvm::yaml::Hex64 getFunctionAddress() const { + if (!BBRanges || BBRanges->empty()) + return 0; + return BBRanges->front().BaseAddress; + } }; struct PGOAnalysisMapEntry { @@ -751,6 +763,7 @@ bool shouldAllocateFileSpace(ArrayRef Phdrs, LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry::BBEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry::BBRangeEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::PGOAnalysisMapEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::PGOAnalysisMapEntry::PGOBBEntry) LLVM_YAML_IS_SEQUENCE_VECTOR( @@ -916,11 +929,15 @@ template <> struct MappingTraits { }; template <> struct MappingTraits { - static void mapping(IO &IO, ELFYAML::BBAddrMapEntry &Rel); + static void mapping(IO &IO, ELFYAML::BBAddrMapEntry &E); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBRangeEntry &E); }; template <> struct MappingTraits { - static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &Rel); + static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &E); }; template <> struct MappingTraits { diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index d6d767f3d22c..2013d438b30c 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -137,7 +137,7 @@ namespace llvm { TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), EnableMachineFunctionSplitter(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), + SupportsDefaultOutlining(false), EmitAddrsig(false), BBAddrMap(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayFunctionIndex(true), @@ -303,6 +303,10 @@ namespace llvm { /// Emit address-significance table. unsigned EmitAddrsig : 1; + // Emit the SHT_LLVM_BB_ADDR_MAP section containing basic block address + // which can be used to map virtual addresses to machine basic blocks. + unsigned BBAddrMap : 1; + /// Emit basic blocks into separate sections. BasicBlockSection BBSections = BasicBlockSection::None; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4eeebe5fc734..322ae67a1e8a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -149,8 +149,9 @@ static cl::bits PgoAnalysisMapFeatures( "Basic Block Frequency"), clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability")), - cl::desc("Enable extended information within the BBAddrMap that is " - "extracted from PGO related analysis.")); + cl::desc( + "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is " + "extracted from PGO related analysis.")); const char DWARFGroupName[] = "dwarf"; const char DWARFGroupDescription[] = "DWARF Emission"; @@ -1348,6 +1349,14 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) { .encode(); } +static llvm::object::BBAddrMap::Features +getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) { + return {PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::FuncEntryCount), + PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq), + PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb), + MF.hasBBSections() && NumMBBSectionRanges > 1}; +} + void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { MCSection *BBAddrMapSection = getObjFileLowering().getBBAddrMapSection(*MF.getSection()); @@ -1361,17 +1370,48 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); - auto FeaturesBits = static_cast(PgoAnalysisMapFeatures.getBits()); - OutStreamer->emitInt8(FeaturesBits); - OutStreamer->AddComment("function address"); - OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); - OutStreamer->AddComment("number of basic blocks"); - OutStreamer->emitULEB128IntValue(MF.size()); - const MCSymbol *PrevMBBEndSymbol = FunctionSymbol; + auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size()); + OutStreamer->emitInt8(Features.encode()); // Emit BB Information for each basic block in the function. + if (Features.MultiBBRange) { + OutStreamer->AddComment("number of basic block ranges"); + OutStreamer->emitULEB128IntValue(MBBSectionRanges.size()); + } + // Number of blocks in each MBB section. + MapVector MBBSectionNumBlocks; + const MCSymbol *PrevMBBEndSymbol = nullptr; + if (!Features.MultiBBRange) { + OutStreamer->AddComment("function address"); + OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); + OutStreamer->AddComment("number of basic blocks"); + OutStreamer->emitULEB128IntValue(MF.size()); + PrevMBBEndSymbol = FunctionSymbol; + } else { + unsigned BBCount = 0; + for (const MachineBasicBlock &MBB : MF) { + BBCount++; + if (MBB.isEndSection()) { + // Store each section's basic block count when it ends. + MBBSectionNumBlocks[MBB.getSectionIDNum()] = BBCount; + // Reset the count for the next section. + BBCount = 0; + } + } + } + // Emit the BB entry for each basic block in the function. for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); + bool IsBeginSection = + Features.MultiBBRange && (MBB.isBeginSection() || MBB.isEntryBlock()); + if (IsBeginSection) { + OutStreamer->AddComment("base address"); + OutStreamer->emitSymbolValue(MBBSymbol, getPointerSize()); + OutStreamer->AddComment("number of basic blocks"); + OutStreamer->emitULEB128IntValue( + MBBSectionNumBlocks[MBB.getSectionIDNum()]); + PrevMBBEndSymbol = MBBSymbol; + } // TODO: Remove this check when version 1 is deprecated. if (BBAddrMapVersion > 1) { OutStreamer->AddComment("BB id"); @@ -1393,35 +1433,32 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { PrevMBBEndSymbol = MBB.getEndSymbol(); } - if (FeaturesBits != 0) { + if (Features.hasPGOAnalysis()) { assert(BBAddrMapVersion >= 2 && "PGOAnalysisMap only supports version 2 or later"); - auto FeatEnable = - cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits)); - - if (FeatEnable.FuncEntryCount) { + if (Features.FuncEntryCount) { OutStreamer->AddComment("function entry count"); auto MaybeEntryCount = MF.getFunction().getEntryCount(); OutStreamer->emitULEB128IntValue( MaybeEntryCount ? MaybeEntryCount->getCount() : 0); } const MachineBlockFrequencyInfo *MBFI = - FeatEnable.BBFreq + Features.BBFreq ? &getAnalysis().getBFI() : nullptr; const MachineBranchProbabilityInfo *MBPI = - FeatEnable.BrProb ? &getAnalysis() - : nullptr; + Features.BrProb ? &getAnalysis() + : nullptr; - if (FeatEnable.BBFreq || FeatEnable.BrProb) { + if (Features.BBFreq || Features.BrProb) { for (const MachineBasicBlock &MBB : MF) { - if (FeatEnable.BBFreq) { + if (Features.BBFreq) { OutStreamer->AddComment("basic block frequency"); OutStreamer->emitULEB128IntValue( MBFI->getBlockFreq(&MBB).getFrequency()); } - if (FeatEnable.BrProb) { + if (Features.BrProb) { unsigned SuccCount = MBB.succ_size(); OutStreamer->AddComment("basic block successor count"); OutStreamer->emitULEB128IntValue(SuccCount); @@ -1805,7 +1842,7 @@ void AsmPrinter::emitFunctionBody() { // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a // section. - if (MF->hasBBLabels() || + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); @@ -1958,7 +1995,7 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. if (HasAnyRealCode) { - if (MF->hasBBLabels()) + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap) emitBBAddrMapSection(*MF); else if (PgoAnalysisMapFeatures.getBits() != 0) MF->getContext().reportWarning( @@ -2491,9 +2528,9 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { bool NeedsLocalForSize = MAI->needsLocalForSize(); if (F.hasFnAttribute("patchable-function-entry") || F.hasFnAttribute("function-instrument") || - F.hasFnAttribute("xray-instruction-threshold") || - needFuncLabels(MF) || NeedsLocalForSize || - MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) { + F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF) || + NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || + MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -3926,7 +3963,9 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock( // With `-fbasic-block-sections=`, a label is needed for every non-entry block // in the labels mode (option `=labels`) and every section beginning in the // sections mode (`=all` and `=list=`). - if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock()) + if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || + MBB.isBeginSection()) && + !MBB.isEntryBlock()) return true; // A label is needed for any block with at least one predecessor (when that // predecessor is not the fallthrough predecessor, or if it is an EH funclet diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index dbb6ebb3d7eb..eb3f9e7078f1 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -57,10 +57,10 @@ // function into potentially several disjoint pieces, and CFI needs to be // emitted per cluster. This also bloats the object file and binary sizes. // -// Basic Block Labels +// Basic Block Address Map // ================== // -// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of +// With -fbasic-block-address-map, we emit the offsets of BB addresses of // every function into the .llvm_bb_addr_map section. Along with the function // symbols, this allows for mapping of virtual addresses in PMU profiles back to // the corresponding basic blocks. This logic is implemented in AsmPrinter. This @@ -118,6 +118,10 @@ public: /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; + +private: + bool handleBBSections(MachineFunction &MF); + bool handleBBAddrMap(MachineFunction &MF); }; } // end anonymous namespace @@ -280,10 +284,12 @@ bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) { return false; } -bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { +// Identify, arrange, and modify basic blocks which need separate sections +// according to the specification provided by the -fbasic-block-sections flag. +bool BasicBlockSections::handleBBSections(MachineFunction &MF) { auto BBSectionsType = MF.getTarget().getBBSectionsType(); - assert(BBSectionsType != BasicBlockSection::None && - "BB Sections not enabled!"); + if (BBSectionsType == BasicBlockSection::None) + return false; // Check for source drift. If the source has changed since the profiles // were obtained, optimizing basic blocks might be sub-optimal. @@ -300,7 +306,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); - return false; + return true; } DenseMap FuncClusterInfo; @@ -364,6 +370,27 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } +// When the BB address map needs to be generated, this renumbers basic blocks to +// make them appear in increasing order of their IDs in the function. This +// avoids the need to store basic block IDs in the BB address map section, since +// they can be determined implicitly. +bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) { + if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) + return false; + if (!MF.getTarget().Options.BBAddrMap) + return false; + MF.RenumberBlocks(); + return true; +} + +bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { + // First handle the basic block sections. + auto R1 = handleBBSections(MF); + // Handle basic block address map after basic block sections are finalized. + auto R2 = handleBBAddrMap(MF); + return R1 || R2; +} + void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index c34a52a6f2de..8f73c7aca7dd 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -87,6 +87,7 @@ CGOPT_EXP(bool, DataSections) CGOPT_EXP(bool, FunctionSections) CGOPT(bool, IgnoreXCOFFVisibility) CGOPT(bool, XCOFFTracebackTable) +CGOPT(bool, EnableBBAddrMap) CGOPT(std::string, BBSections) CGOPT(unsigned, TLSSize) CGOPT_EXP(bool, EmulatedTLS) @@ -380,6 +381,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(true)); CGBINDOPT(XCOFFTracebackTable); + static cl::opt EnableBBAddrMap( + "basic-block-address-map", + cl::desc("Emit the basic block address map section"), cl::init(false)); + CGBINDOPT(EnableBBAddrMap); + static cl::opt BBSections( "basic-block-sections", cl::desc("Emit basic blocks into separate sections"), @@ -553,6 +559,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.FunctionSections = getFunctionSections(); Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility(); Options.XCOFFTracebackTable = getXCOFFTracebackTable(); + Options.BBAddrMap = getEnableBBAddrMap(); Options.BBSections = getBBSectionsMode(Options); Options.UniqueSectionNames = getUniqueSectionNames(); Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 6d5a05f7970e..93e2c450e69e 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -460,6 +460,7 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, // `-basic-block-sections=list` to allow robust mapping of profiles to basic // blocks. if (Target.getBBSectionsType() == BasicBlockSection::Labels || + Target.Options.BBAddrMap || Target.getBBSectionsType() == BasicBlockSection::List) MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 5133702c0a9b..2951d2c0f7bf 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1257,19 +1257,13 @@ void TargetPassConfig::addMachinePasses() { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); + bool NeedsBBSections = + TM->getBBSectionsType() != llvm::BasicBlockSection::None; // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. Basic block sections takes precedence. - // FIXME: In principle, BasicBlockSection::Labels and splitting can used - // together. Update this check once we have addressed any issues. - if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { - addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( - TM->getBBSectionsFuncListBuf())); - addPass(llvm::createBasicBlockPathCloningPass()); - } - addPass(llvm::createBasicBlockSectionsPass()); - } else if (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter) { + // cannot be enabled at the same time. We do not apply machine function + // splitter if -basic-block-sections is requested. + if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter)) { const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty()) { if (EnableFSDiscriminator) { @@ -1286,6 +1280,16 @@ void TargetPassConfig::addMachinePasses() { } addPass(createMachineFunctionSplitterPass()); } + // We run the BasicBlockSections pass if either we need BB sections or BB + // address map (or both). + if (NeedsBBSections || TM->Options.BBAddrMap) { + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( + TM->getBBSectionsFuncListBuf())); + addPass(llvm::createBasicBlockPathCloningPass()); + } + addPass(llvm::createBasicBlockSectionsPass()); + } addPostBBSections(); diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index e9715998b532..6f35913d0939 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -692,6 +692,17 @@ decodeBBAddrMapImpl(const ELFFile &EF, for (typename ELFFile::Elf_Rela Rela : *Relas) FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend; } + auto GetAddressForRelocation = + [&](unsigned RelocationOffsetInSection) -> Expected { + auto FOTIterator = + FunctionOffsetTranslations.find(RelocationOffsetInSection); + if (FOTIterator == FunctionOffsetTranslations.end()) { + return createError("failed to get relocation data for offset: " + + Twine::utohexstr(RelocationOffsetInSection) + + " in section " + describe(EF, Sec)); + } + return FOTIterator->second; + }; Expected> ContentsOrErr = EF.getSectionContents(Sec); if (!ContentsOrErr) return ContentsOrErr.takeError(); @@ -703,9 +714,26 @@ decodeBBAddrMapImpl(const ELFFile &EF, Error ULEBSizeErr = Error::success(); Error MetadataDecodeErr = Error::success(); + // Helper lampda to extract the (possiblly relocatable) address stored at Cur. + auto ExtractAddress = [&]() -> Expected::uintX_t> { + uint64_t RelocationOffsetInSection = Cur.tell(); + auto Address = + static_cast::uintX_t>(Data.getAddress(Cur)); + if (!Cur) + return Cur.takeError(); + if (!IsRelocatable) + return Address; + assert(Address == 0); + Expected AddressOrErr = + GetAddressForRelocation(RelocationOffsetInSection); + if (!AddressOrErr) + return AddressOrErr.takeError(); + return *AddressOrErr; + }; + uint8_t Version = 0; uint8_t Feature = 0; - PGOAnalysisMap::Features FeatEnable{}; + BBAddrMap::Features FeatEnable{}; while (!ULEBSizeErr && !MetadataDecodeErr && Cur && Cur.tell() < Content.size()) { if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { @@ -718,11 +746,10 @@ decodeBBAddrMapImpl(const ELFFile &EF, Feature = Data.getU8(Cur); // Feature byte if (!Cur) break; - auto FeatEnableOrErr = PGOAnalysisMap::Features::decode(Feature); + auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature); if (!FeatEnableOrErr) return FeatEnableOrErr.takeError(); - FeatEnable = - FeatEnableOrErr ? *FeatEnableOrErr : PGOAnalysisMap::Features{}; + FeatEnable = *FeatEnableOrErr; if (Feature != 0 && Version < 2 && Cur) return createError( "version should be >= 2 for SHT_LLVM_BB_ADDR_MAP when " @@ -730,50 +757,65 @@ decodeBBAddrMapImpl(const ELFFile &EF, Twine(static_cast(Version)) + " feature = " + Twine(static_cast(Feature))); } - uint64_t SectionOffset = Cur.tell(); - auto Address = - static_cast::uintX_t>(Data.getAddress(Cur)); - if (!Cur) - return Cur.takeError(); - if (IsRelocatable) { - assert(Address == 0); - auto FOTIterator = FunctionOffsetTranslations.find(SectionOffset); - if (FOTIterator == FunctionOffsetTranslations.end()) { - return createError("failed to get relocation data for offset: " + - Twine::utohexstr(SectionOffset) + " in section " + - describe(EF, Sec)); - } - Address = FOTIterator->second; - } - uint32_t NumBlocks = readULEB128As(Data, Cur, ULEBSizeErr); - + uint32_t NumBlocksInBBRange = 0; + uint32_t NumBBRanges = 1; + typename ELFFile::uintX_t RangeBaseAddress = 0; std::vector BBEntries; - uint32_t PrevBBEndOffset = 0; - for (uint32_t BlockIndex = 0; - !MetadataDecodeErr && !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); - ++BlockIndex) { - uint32_t ID = Version >= 2 - ? readULEB128As(Data, Cur, ULEBSizeErr) - : BlockIndex; - uint32_t Offset = readULEB128As(Data, Cur, ULEBSizeErr); - uint32_t Size = readULEB128As(Data, Cur, ULEBSizeErr); - uint32_t MD = readULEB128As(Data, Cur, ULEBSizeErr); - if (Version >= 1) { - // Offset is calculated relative to the end of the previous BB. - Offset += PrevBBEndOffset; - PrevBBEndOffset = Offset + Size; - } - Expected MetadataOrErr = - BBAddrMap::BBEntry::Metadata::decode(MD); - if (!MetadataOrErr) { - MetadataDecodeErr = MetadataOrErr.takeError(); + if (FeatEnable.MultiBBRange) { + NumBBRanges = readULEB128As(Data, Cur, ULEBSizeErr); + if (!Cur || ULEBSizeErr) break; + if (!NumBBRanges) + return createError("invalid zero number of BB ranges at offset " + + Twine::utohexstr(Cur.tell()) + " in " + + describe(EF, Sec)); + } else { + auto AddressOrErr = ExtractAddress(); + if (!AddressOrErr) + return AddressOrErr.takeError(); + RangeBaseAddress = *AddressOrErr; + NumBlocksInBBRange = readULEB128As(Data, Cur, ULEBSizeErr); + } + std::vector BBRangeEntries; + uint32_t TotalNumBlocks = 0; + for (uint32_t BBRangeIndex = 0; BBRangeIndex < NumBBRanges; + ++BBRangeIndex) { + uint32_t PrevBBEndOffset = 0; + if (FeatEnable.MultiBBRange) { + auto AddressOrErr = ExtractAddress(); + if (!AddressOrErr) + return AddressOrErr.takeError(); + RangeBaseAddress = *AddressOrErr; + NumBlocksInBBRange = readULEB128As(Data, Cur, ULEBSizeErr); + } + for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && + (BlockIndex < NumBlocksInBBRange); + ++BlockIndex) { + uint32_t ID = Version >= 2 + ? readULEB128As(Data, Cur, ULEBSizeErr) + : BlockIndex; + uint32_t Offset = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t Size = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t MD = readULEB128As(Data, Cur, ULEBSizeErr); + if (Version >= 1) { + // Offset is calculated relative to the end of the previous BB. + Offset += PrevBBEndOffset; + PrevBBEndOffset = Offset + Size; + } + Expected MetadataOrErr = + BBAddrMap::BBEntry::Metadata::decode(MD); + if (!MetadataOrErr) { + MetadataDecodeErr = MetadataOrErr.takeError(); + break; + } + BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); } - BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); + TotalNumBlocks += BBEntries.size(); + BBRangeEntries.push_back({RangeBaseAddress, std::move(BBEntries)}); } - FunctionEntries.emplace_back(Address, std::move(BBEntries)); + FunctionEntries.push_back({std::move(BBRangeEntries)}); - if (PGOAnalyses || FeatEnable.anyEnabled()) { + if (PGOAnalyses || FeatEnable.hasPGOAnalysis()) { // Function entry count uint64_t FuncEntryCount = FeatEnable.FuncEntryCount @@ -782,8 +824,8 @@ decodeBBAddrMapImpl(const ELFFile &EF, std::vector PGOBBEntries; for (uint32_t BlockIndex = 0; - (FeatEnable.BBFreq || FeatEnable.BrProb) && !MetadataDecodeErr && - !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); + FeatEnable.hasPGOAnalysisBBData() && !MetadataDecodeErr && + !ULEBSizeErr && Cur && (BlockIndex < TotalNumBlocks); ++BlockIndex) { // Block frequency uint64_t BBF = FeatEnable.BBFreq diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 949e3b5ead29..ef1307fee6d7 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1419,24 +1419,43 @@ void ELFState::writeSectionContent( CBA.write(E.Feature); SHeader.sh_size += 2; } - - if (Section.PGOAnalyses) { - if (E.Version < 2) - WithColor::warning() - << "unsupported SHT_LLVM_BB_ADDR_MAP version when using PGO: " - << static_cast(E.Version) << "; must use version >= 2"; + auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature); + bool MultiBBRangeFeatureEnabled = false; + if (!FeatureOrErr) + WithColor::warning() << toString(FeatureOrErr.takeError()); + else + MultiBBRangeFeatureEnabled = FeatureOrErr->MultiBBRange; + bool MultiBBRange = + MultiBBRangeFeatureEnabled || + (E.NumBBRanges.has_value() && E.NumBBRanges.value() != 1) || + (E.BBRanges && E.BBRanges->size() != 1); + if (MultiBBRange && !MultiBBRangeFeatureEnabled) + WithColor::warning() << "feature value(" << E.Feature + << ") does not support multiple BB ranges."; + if (MultiBBRange) { + // Write the number of basic block ranges, which is overridden by the + // 'NumBBRanges' field when specified. + uint64_t NumBBRanges = + E.NumBBRanges.value_or(E.BBRanges ? E.BBRanges->size() : 0); + SHeader.sh_size += CBA.writeULEB128(NumBBRanges); } - - // Write the address of the function. - CBA.write(E.Address, ELFT::TargetEndianness); - // Write number of BBEntries (number of basic blocks in the function). This - // is overridden by the 'NumBlocks' YAML field when specified. - uint64_t NumBlocks = - E.NumBlocks.value_or(E.BBEntries ? E.BBEntries->size() : 0); - SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); - // Write all BBEntries. - if (E.BBEntries) { - for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) { + if (!E.BBRanges) + continue; + uint64_t TotalNumBlocks = 0; + for (const ELFYAML::BBAddrMapEntry::BBRangeEntry &BBR : *E.BBRanges) { + // Write the base address of the range. + CBA.write(BBR.BaseAddress, ELFT::TargetEndianness); + // Write number of BBEntries (number of basic blocks in this basic block + // range). This is overridden by the 'NumBlocks' YAML field when + // specified. + uint64_t NumBlocks = + BBR.NumBlocks.value_or(BBR.BBEntries ? BBR.BBEntries->size() : 0); + SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); + // Write all BBEntries in this BBRange. + if (!BBR.BBEntries) + continue; + for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *BBR.BBEntries) { + ++TotalNumBlocks; if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1) SHeader.sh_size += CBA.writeULEB128(BBE.ID); SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset); @@ -1444,7 +1463,6 @@ void ELFState::writeSectionContent( SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); } } - if (!PGOAnalyses) continue; const ELFYAML::PGOAnalysisMapEntry &PGOEntry = PGOAnalyses->at(Idx); @@ -1456,11 +1474,11 @@ void ELFState::writeSectionContent( continue; const auto &PGOBBEntries = PGOEntry.PGOBBEntries.value(); - if (!E.BBEntries || E.BBEntries->size() != PGOBBEntries.size()) { + if (TotalNumBlocks != PGOBBEntries.size()) { WithColor::warning() << "PBOBBEntries must be the same length as " "BBEntries in SHT_LLVM_BB_ADDR_MAP.\n" << "Mismatch on function with address: " - << E.Address; + << E.getFunctionAddress(); continue; } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 83deb680074f..1dae7223f8ab 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1676,7 +1676,6 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::CallGraphProfileSection()); sectionMapping(IO, *cast(Section.get())); break; - case ELF::SHT_LLVM_BB_ADDR_MAP_V0: case ELF::SHT_LLVM_BB_ADDR_MAP: if (!IO.outputting()) Section.reset(new ELFYAML::BBAddrMapSection()); @@ -1808,7 +1807,13 @@ void MappingTraits::mapping( assert(IO.getContext() && "The IO context is not initialized"); IO.mapRequired("Version", E.Version); IO.mapOptional("Feature", E.Feature, Hex8(0)); - IO.mapOptional("Address", E.Address, Hex64(0)); + IO.mapOptional("NumBBRanges", E.NumBBRanges); + IO.mapOptional("BBRanges", E.BBRanges); +} + +void MappingTraits::mapping( + IO &IO, ELFYAML::BBAddrMapEntry::BBRangeEntry &E) { + IO.mapOptional("BaseAddress", E.BaseAddress, Hex64(0)); IO.mapOptional("NumBlocks", E.NumBlocks); IO.mapOptional("BBEntries", E.BBEntries); } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll similarity index 94% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll index b8217bbc0076..cd9e3ad18204 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-functions-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-address-map | FileCheck %s $_Z4fooTIiET_v = comdat any diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll new file mode 100644 index 000000000000..6354e2ec6889 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll @@ -0,0 +1,71 @@ +;; Check that the -basic-block-address-map option works when used along with -basic-block-sections. +;; Let a function with 4 basic blocks get split into 2 sections. +; RUN: echo '!_Z3bazb' > %t +; RUN: echo '!!0 2' >> %t +; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map -basic-block-sections=%t | FileCheck %s + +define void @_Z3bazb(i1 zeroext) personality i32 (...)* @__gxx_personality_v0 { + br i1 %0, label %2, label %7 + +2: + %3 = invoke i32 @_Z3barv() + to label %7 unwind label %5 + br label %9 + +5: + landingpad { i8*, i32 } + catch i8* null + br label %9 + +7: + %8 = call i32 @_Z3foov() + br label %9 + +9: + ret void +} + +declare i32 @_Z3barv() #1 + +declare i32 @_Z3foov() #1 + +declare i32 @__gxx_personality_v0(...) + +; CHECK: .text +; CHECK-LABEL: _Z3bazb: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK-LABEL: .LBB_END0_0: +; CHECK-LABEL: .LBB0_1: +; CHECK-LABEL: .LBB_END0_1: +; CHECK: .section .text.split._Z3bazb,"ax",@progbits +; CHECK-LABEL: _Z3bazb.cold: +; CHECK-LABEL: .LBB_END0_2: +; CHECK-LABEL: .LBB0_3: +; CHECK-LABEL: .LBB_END0_3: +; CHECK-LABEL: .Lfunc_end0: + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.hot._Z3bazb +; CHECK-NEXT: .byte 2 # version +; CHECK-NEXT: .byte 8 # feature +; CHECK-NEXT: .byte 2 # number of basic block ranges +; CHECK-NEXT: .quad .Lfunc_begin0 # base address +; CHECK-NEXT: .byte 2 # number of basic blocks +; CHECK-NEXT: .byte 0 # BB id +; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 2 # BB id +; CHECK-NEXT: .uleb128 .LBB0_1-.LBB_END0_0 +; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .quad _Z3bazb.cold # base address +; CHECK-NEXT: .byte 2 # number of basic blocks +; CHECK-NEXT: .byte 1 # BB id +; CHECK-NEXT: .uleb128 _Z3bazb.cold-_Z3bazb.cold +; CHECK-NEXT: .uleb128 .LBB_END0_2-_Z3bazb.cold +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 3 # BB id +; CHECK-NEXT: .uleb128 .LBB0_3-.LBB_END0_2 +; CHECK-NEXT: .uleb128 .LBB_END0_3-.LBB0_3 +; CHECK-NEXT: .byte 1 + diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll b/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll new file mode 100644 index 000000000000..f2ceae08eb94 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll @@ -0,0 +1,90 @@ +; COM: Emitting basic-block-address-map when machine function splitting is enabled. +; RUN: llc < %s -mtriple=x86_64 -function-sections -split-machine-functions -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC + +; COM: Emitting basic-block-address-map with PGO analysis with machine function splitting enabled. +; RUN: llc < %s -mtriple=x86_64 -function-sections -split-machine-functions -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO + +define void @foo(i1 zeroext %0) nounwind !prof !14 { + br i1 %0, label %2, label %4, !prof !15 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @qux() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 7000} +!15 = !{!"branch_weights", i32 7000, i32 0} + +; CHECK: .section .text.hot.foo,"ax",@progbits +; CHECK-LABEL: foo: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK-LABEL: .LBB_END0_0: +; CHECK-LABEL: .LBB0_1: +; CHECK-LABEL: .LBB_END0_1: +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK-LABEL: foo.cold: +; CHECK-LABEL: .LBB_END0_2: +; CHECK-LABEL: .Lfunc_end0: + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.hot.foo +; CHECK-NEXT: .byte 2 # version +; BASIC-NEXT: .byte 8 # feature +; PGO-NEXT: .byte 15 # feature +; CHECK-NEXT: .byte 2 # number of basic block ranges +; CHECK-NEXT: .quad .Lfunc_begin0 # base address +; CHECK-NEXT: .byte 2 # number of basic blocks +; CHECK-NEXT: .byte 0 # BB id +; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 +; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 1 # BB id +; CHECK-NEXT: .uleb128 .LBB0_1-.LBB_END0_0 +; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .quad foo.cold # base address +; CHECK-NEXT: .byte 1 # number of basic blocks +; CHECK-NEXT: .byte 2 # BB id +; CHECK-NEXT: .uleb128 foo.cold-foo.cold +; CHECK-NEXT: .uleb128 .LBB_END0_2-foo.cold +; CHECK-NEXT: .byte 3 + +;; PGO Analysis Map +; PGO: .ascii "\3306" # function entry count +; PGO-NEXT: .ascii "\200\200\200\200@" # basic block frequency +; PGO-NEXT: .byte 2 # basic block successor count +; PGO-NEXT: .byte 1 # successor BB ID +; PGO-NEXT: .ascii "\200\200\200\200\b" # successor branch probability +; PGO-NEXT: .byte 2 # successor BB ID +; PGO-NEXT: .byte 0 # successor branch probability +; PGO-NEXT: .ascii "\370\377\377\377?" # basic block frequency +; PGO-NEXT: .byte 0 # basic block successor count +; PGO-NEXT: .byte 8 # basic block frequency +; PGO-NEXT: .byte 0 # basic block successor count + diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels.ll b/llvm/test/CodeGen/X86/basic-block-address-map.ll similarity index 90% rename from llvm/test/CodeGen/X86/basic-block-sections-labels.ll rename to llvm/test/CodeGen/X86/basic-block-address-map.ll index 0b0b00a0b981..6ab24b494936 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map.ll @@ -1,5 +1,7 @@ ; Check the basic block sections labels option +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,UNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,NOUNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,NOUNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ diff --git a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml index 6864932411ae..cc7faea67bed 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-disassemble-symbolize-operands.yaml @@ -1,30 +1,32 @@ ## Test that in the presence of SHT_LLVM_BB_ADDR_MAP sections, ## --symbolize-operands can display labels. -## Executable object file. -# RUN: yaml2obj --docnum=1 -DFOO_ADDR=0x4000 -DBAR_ADDR=0x5000 %s -o %t1 -# RUN: llvm-objdump %t1 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s -DSYM=symbol --match-full-lines --check-prefixes=INTEL -# RUN: llvm-objdump %t1 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s -DSYM=symbol --match-full-lines --check-prefixes=ATT +## Executable object file with separate SHT_LLVM_BB_ADDR_MAP sections for multiple text sections. +## RUN: yaml2obj --docnum=1 %s -o %t1 +## RUN: llvm-objdump %t1 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ +## RUN: FileCheck %s --match-full-lines --check-prefixes=INTEL +## RUN: llvm-objdump %t1 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ +## RUN: FileCheck %s --match-full-lines --check-prefixes=ATT ## Executable object file with a single SHT_LLVM_BB_ADDR_MAP for multiple text sections. -# RUN: yaml2obj --docnum=2 %s -o %t3 -# RUN: llvm-objdump %t3 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s -DSYM=symbol --match-full-lines --check-prefixes=INTEL -# RUN: llvm-objdump %t3 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s -DSYM=symbol --match-full-lines --check-prefixes=ATT +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-objdump %t2 -d --symbolize-operands -M intel --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefixes=INTEL +# RUN: llvm-objdump %t2 -d --symbolize-operands -M att --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines --check-prefixes=ATT ## Expect to find the branch and basic block labels and global variable name. # ATT: : # ATT-NEXT: : # ATT-NEXT: pushq %rax # ATT-NEXT: : -# ATT-NEXT: cmpl , %eax <[[SYM]]> +# ATT-NEXT: cmpl , %eax # ATT-NEXT: nop # ATT-NEXT: : # ATT-NEXT: jge # ATT-NEXT: jmp +# ATT-NEXT: : +# ATT-NEXT: jge # ATT-NEXT: : # ATT-NEXT: retq # ATT: : @@ -37,16 +39,21 @@ # ATT-NEXT: : # ATT-NEXT: callq # ATT-NEXT: retq +# ATT: : +# ATT-NEXT: : +# ATT-NEXT: retq # INTEL: : # INTEL-NEXT: : # INTEL-NEXT: push rax # INTEL-NEXT: : -# INTEL-NEXT: cmp eax, dword ptr <[[SYM]]> +# INTEL-NEXT: cmp eax, dword ptr # INTEL-NEXT: nop # INTEL-NEXT: : # INTEL-NEXT: jge # INTEL-NEXT: jmp +# INTEL-NEXT: : +# INTEL-NEXT: jge # INTEL-NEXT: : # INTEL-NEXT: ret # INTEL: : @@ -59,6 +66,9 @@ # INTEL-NEXT: : # INTEL-NEXT: call # INTEL-NEXT: ret +# INTEL: : +# INTEL-NEXT: : +# INTEL-NEXT: ret ## This object file contains a separate text section and SHT_LLVM_BB_ADDR_MAP ## section for each of the two functions foo and bar. @@ -73,68 +83,89 @@ FileHeader: Sections: - Name: .text.foo Type: SHT_PROGBITS - Address: [[FOO_ADDR]] + Address: 0x4000 Flags: [SHF_ALLOC, SHF_EXECINSTR] - Content: '503b0505200000907d02ebf5c3' + Content: '503b0505300000907d08ebf50f8dee1f0000c3' - Name: .text.bar Type: SHT_PROGBITS - Address: [[BAR_ADDR]] + Address: 0x5000 Flags: [SHF_ALLOC, SHF_EXECINSTR] Content: '5089d0740231f6e8f4ffffffc3' + - Name: .text.split + Type: SHT_PROGBITS + Address: 0x6000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: 'c3' - Name: .data Type: SHT_PROGBITS Flags: [SHF_ALLOC, SHF_WRITE] - Address: 0x6000 + Address: 0x7000 - Name: .llvm_bb_addr_map.foo Type: SHT_LLVM_BB_ADDR_MAP Link: .text.foo Entries: - Version: 2 - Address: [[FOO_ADDR]] - BBEntries: - - ID: 3 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - ID: 1 - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 - - ID: 2 - AddressOffset: 0x1 - Size: 0x4 - Metadata: 0x0 - - ID: 5 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + Feature: 0x8 + BBRanges: + - BaseAddress: 0x4000 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 4 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x1 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + - BaseAddress: 0x6000 + BBEntries: + - ID: 6 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 - Name: .llvm_bb_addr_map.bar Type: SHT_LLVM_BB_ADDR_MAP Link: .text.bar Entries: - Version: 1 - Address: [[BAR_ADDR]] - BBEntries: - - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - AddressOffset: 0x4 - Size: 0x2 - Metadata: 0x0 - - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 + BBRanges: + - BaseAddress: 0x5000 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - AddressOffset: 0x4 + Size: 0x2 + Metadata: 0x0 + - AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 Symbols: - Name: foo Section: .text.foo - Value: [[FOO_ADDR]] + Value: 0x4000 + - Name: foo.cold + Section: .text.split + Value: 0x6000 - Name: bar Section: .text.bar - Value: [[BAR_ADDR]] + Value: 0x5000 - Name: symbol Section: .data - Value: 0x600c + Value: 0x700c ## This object file contains a single SHT_LLVM_BB_ADDR_MAP for two text ## sections .text.foo and .text.bar. @@ -149,59 +180,80 @@ Sections: Type: SHT_PROGBITS Address: 0x4000 Flags: [SHF_ALLOC, SHF_EXECINSTR] - Content: '503b0505200000907d02ebf5c3' + Content: '503b0505300000907d08ebf50f8dee1f0000c3' - Name: .text.bar Type: SHT_PROGBITS Address: 0x5000 Flags: [SHF_ALLOC, SHF_EXECINSTR] Content: '5089d0740231f6e8f4ffffffc3' + - Name: .text.split + Type: SHT_PROGBITS + Address: 0x6000 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: 'c3' - Name: .data Type: SHT_PROGBITS Flags: [SHF_ALLOC, SHF_WRITE] - Address: 0x6000 + Address: 0x7000 - Name: .llvm_bb_addr_map.foo Type: SHT_LLVM_BB_ADDR_MAP Link: .text.foo Entries: - Version: 2 - Address: 0x4000 - BBEntries: - - ID: 3 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - ID: 1 - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 - - ID: 2 - AddressOffset: 0x1 - Size: 0x4 - Metadata: 0x0 - - ID: 5 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + Feature: 0x8 + BBRanges: + - BaseAddress: 0x4000 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 4 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x1 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + - BaseAddress: 0x6000 + BBEntries: + - ID: 6 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 - Version: 1 - Address: 0x5000 - BBEntries: - - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - AddressOffset: 0x4 - Size: 0x2 - Metadata: 0x0 - - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 + BBRanges: + - BaseAddress: 0x5000 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - AddressOffset: 0x4 + Size: 0x2 + Metadata: 0x0 + - AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 Symbols: - Name: foo Section: .text.foo Value: 0x4000 + - Name: foo.cold + Section: .text.split + Value: 0x6000 - Name: bar Section: .text.bar Value: 0x5000 - Name: symbol Section: .data - Value: 0x600c + Value: 0x700c diff --git a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml index f796963481cd..706d386e467e 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-bbaddrmap-symbolize-relocatable.yaml @@ -24,17 +24,32 @@ Sections: Link: .text Entries: - Version: 2 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 + BBRanges: + - BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0xa - Version: 2 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 + BBRanges: + - BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0xb + - Version: 2 + Feature: 0x8 + BBRanges: + - BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0xc + - BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0xd - Name: .rela.llvm_bb_addr_map Type: SHT_RELA Flags: [ SHF_INFO_LINK ] @@ -48,6 +63,15 @@ Sections: Symbol: .text Type: R_X86_64_64 Addend: 1 + - Offset: 0x21 + Symbol: .text + Type: R_X86_64_64 + Addend: 0x2 + - Offset: 0x2e + Symbol: .text + Type: R_X86_64_64 + Addend: 0x3 + Symbols: - Name: a Section: .text @@ -55,6 +79,12 @@ Symbols: - Name: c Section: .text Value: 0x1 + - Name: h + Section: .text + Value: 0x2 + - Name: h1 + Section: .text + Value: 0x3 - Name: .text Type: STT_SECTION Section: .text diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml index c4bf443f920a..732fab3e2a37 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml @@ -25,13 +25,14 @@ Sections: Link: .text.foo Entries: - Version: 2 - Address: 0x0 Feature: 0x1 - BBEntries: - - ID: 3 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 PGOAnalyses: - FuncEntryCount: 1000 Symbols: @@ -65,25 +66,26 @@ Sections: Link: .text.foo Entries: - Version: 2 - Address: 0x0 Feature: 0x3 - BBEntries: - - ID: 3 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - ID: 1 - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 - - ID: 2 - AddressOffset: 0x1 - Size: 0x4 - Metadata: 0x0 - - ID: 5 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: @@ -126,25 +128,26 @@ Sections: Link: .text.foo Entries: - Version: 2 - Address: 0x0 Feature: 0x7 - BBEntries: - - ID: 3 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x1 - - ID: 1 - AddressOffset: 0x0 - Size: 0x6 - Metadata: 0x0 - - ID: 2 - AddressOffset: 0x1 - Size: 0x4 - Metadata: 0x0 - - ID: 5 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 3 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x1 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x1 + Size: 0x4 + Metadata: 0x0 + - ID: 5 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test index 91b3d7e3902e..e5a9400c670c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -24,26 +24,31 @@ # CHECK-NEXT: At: [[ADDR]] # CHECK-NEXT: warning: '[[FILE]]': could not identify function symbol for address ([[ADDR]]) in SHT_LLVM_BB_ADDR_MAP section with index 3 # CHECK-NEXT: Name: -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 0 -# CHECK-NEXT: Offset: 0x0 -# CHECK-NEXT: Size: 0x1 -# CHECK-NEXT: HasReturn: No -# CHECK-NEXT: HasTailCall: Yes -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: No -# CHECK-NEXT: HasIndirectBranch: No -# CHECK-NEXT: } -# CHECK-NEXT: { -# CHECK-NEXT: ID: 2 -# CHECK-NEXT: Offset: 0x4 -# CHECK-NEXT: Size: 0x4 -# CHECK-NEXT: HasReturn: Yes -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: Yes -# CHECK-NEXT: CanFallThrough: No -# CHECK-NEXT: HasIndirectBranch: Yes +# CHECK-NEXT: Base Address: [[ADDR]] +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0x1 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: Yes +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: { +# CHECK-NEXT: ID: 2 +# CHECK-NEXT: Offset: 0x4 +# CHECK-NEXT: Size: 0x4 +# CHECK-NEXT: HasReturn: Yes +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: Yes +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: Yes +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: PGO analyses { @@ -69,16 +74,21 @@ # CHECK-NEXT: Function { # CHECK-NEXT: At: 0x22222 # CHECK-NEXT: Name: foo -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 4 -# CHECK-NEXT: Offset: 0x6 -# CHECK-NEXT: Size: 0x7 -# CHECK-NEXT: HasReturn: No -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: Yes -# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: Base Address: 0x22222 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 4 +# CHECK-NEXT: Offset: 0x6 +# CHECK-NEXT: Size: 0x7 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: Yes +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: PGO analyses { @@ -102,26 +112,31 @@ # TRUNCATED-NEXT: Function { # TRUNCATED-NEXT: At: 0x33333 # TRUNCATED-NEXT: Name: bar -# TRUNCATED-NEXT: BB entries [ -# TRUNCATED-NEXT: { -# TRUNCATED-NEXT: ID: 6 -# TRUNCATED-NEXT: Offset: 0x9 -# TRUNCATED-NEXT: Size: 0xA -# TRUNCATED-NEXT: HasReturn: Yes -# TRUNCATED-NEXT: HasTailCall: Yes -# TRUNCATED-NEXT: IsEHPad: No -# TRUNCATED-NEXT: CanFallThrough: Yes -# TRUNCATED-NEXT: HasIndirectBranch: Yes -# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: BB Ranges [ # TRUNCATED-NEXT: { -# TRUNCATED-NEXT: ID: 7 -# TRUNCATED-NEXT: Offset: 0x1F -# TRUNCATED-NEXT: Size: 0xD -# TRUNCATED-NEXT: HasReturn: No -# TRUNCATED-NEXT: HasTailCall: Yes -# TRUNCATED-NEXT: IsEHPad: Yes -# TRUNCATED-NEXT: CanFallThrough: Yes -# TRUNCATED-NEXT: HasIndirectBranch: No +# TRUNCATED-NEXT: Base Address: 0x33333 +# TRUNCATED-NEXT: BB Entries [ +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 6 +# TRUNCATED-NEXT: Offset: 0x9 +# TRUNCATED-NEXT: Size: 0xA +# TRUNCATED-NEXT: HasReturn: Yes +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: No +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: Yes +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 7 +# TRUNCATED-NEXT: Offset: 0x1F +# TRUNCATED-NEXT: Size: 0xD +# TRUNCATED-NEXT: HasReturn: No +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: Yes +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: No +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: ] # TRUNCATED-NEXT: } # TRUNCATED-NEXT: ] # TRUNCATED-NEXT: PGO analyses { @@ -151,24 +166,26 @@ Sections: Entries: - Version: 2 Feature: 0x7 - Address: [[ADDR=0x11111]] - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 - - ID: 2 - AddressOffset: 0x3 - Size: 0x4 - Metadata: 0x15 + BBRanges: + - BaseAddress: [[ADDR=0x11111]] + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 + - ID: 2 + AddressOffset: 0x3 + Size: 0x4 + Metadata: 0x15 - Version: 2 Feature: 0x3 - Address: 0x22222 - BBEntries: - - ID: 4 - AddressOffset: 0x6 - Size: 0x7 - Metadata: 0x8 + BBRanges: + - BaseAddress: 0x22222 + BBEntries: + - ID: 4 + AddressOffset: 0x6 + Size: 0x7 + Metadata: 0x8 PGOAnalyses: - FuncEntryCount: 100 PGOBBEntries: @@ -190,16 +207,17 @@ Sections: Entries: - Version: 2 Feature: [[FEATURE=0x1]] - Address: 0x33333 - BBEntries: - - ID: 6 - AddressOffset: 0x9 - Size: 0xa - Metadata: 0x1b - - ID: 7 - AddressOffset: 0xc - Size: 0xd - Metadata: 0xe + BBRanges: + - BaseAddress: 0x33333 + BBEntries: + - ID: 6 + AddressOffset: 0x9 + Size: 0xa + Metadata: 0x1b + - ID: 7 + AddressOffset: 0xc + Size: 0xd + Metadata: 0xe PGOAnalyses: - FuncEntryCount: 89 Symbols: diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test index e6b6cc344a8e..e7f78491a947 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-relocatable.test @@ -8,32 +8,42 @@ # CHECK-NEXT: Function { # CHECK-NEXT: At: 0x0 # CHECK-NEXT: Name: -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 0 -# CHECK-NEXT: Offset: 0x0 -# CHECK-NEXT: Size: 0xF -# CHECK-NEXT: HasReturn: Yes -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: No -# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: Base Address: 0x0 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0xF +# CHECK-NEXT: HasReturn: Yes +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: Function { # CHECK-NEXT: At: 0x10 # CHECK-NEXT: Name: -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 0 -# CHECK-NEXT: Offset: 0x0 -# CHECK-NEXT: Size: 0x11 -# CHECK-NEXT: HasReturn: No -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: Yes -# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: Base Address: 0x10 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0x11 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: Yes +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } @@ -54,17 +64,19 @@ Sections: Link: .text Entries: - Version: 2 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0xF - Metadata: 0x1 + BBRanges: + - BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0xF + Metadata: 0x1 - Version: 2 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x11 - Metadata: 0x8 + BBRanges: + - BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x11 + Metadata: 0x8 - Name: .rela.llvm_bb_addr_map Type: SHT_RELA Flags: [ SHF_INFO_LINK ] @@ -128,7 +140,8 @@ Sections: Link: .text Entries: - Version: 2 - BBEntries: + BBRanges: + - BBEntries: - ID: 0 AddressOffset: 0x0 Size: 0xF @@ -181,12 +194,13 @@ Sections: Flags: [ SHF_LINK_ORDER ] Entries: - Version: 2 - Address: 0xF - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0xF - Metadata: 0x1 + BBRanges: + - BaseAddress: 0xF + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0xF + Metadata: 0x1 # RUN: yaml2obj %s --docnum=5 -o %t5.o # RUN: llvm-readobj %t5.o --bb-addr-map 2>&1 | FileCheck %s --check-prefix=ET-DYN-NO-WARNING -DFILE=%t5.o @@ -195,16 +209,21 @@ Sections: # ET-DYN-NO-WARNING: Function { # ET-DYN-NO-WARNING: At: 0xF # ET-DYN-NO-WARNING: Name: -# ET-DYN-NO-WARNING: BB entries [ +# ET-DYN-NO-WARNING: BB Ranges [ # ET-DYN-NO-WARNING: { -# ET-DYN-NO-WARNING: ID: 0 -# ET-DYN-NO-WARNING: Offset: 0x0 -# ET-DYN-NO-WARNING: Size: 0xF -# ET-DYN-NO-WARNING: HasReturn: Yes -# ET-DYN-NO-WARNING: HasTailCall: No -# ET-DYN-NO-WARNING: IsEHPad: No -# ET-DYN-NO-WARNING: CanFallThrough: No -# ET-DYN-NO-WARNING: HasIndirectBranch: No +# ET-DYN-NO-WARNING: Base Address: 0xF +# ET-DYN-NO-WARNING: BB Entries [ +# ET-DYN-NO-WARNING: { +# ET-DYN-NO-WARNING: ID: 0 +# ET-DYN-NO-WARNING: Offset: 0x0 +# ET-DYN-NO-WARNING: Size: 0xF +# ET-DYN-NO-WARNING: HasReturn: Yes +# ET-DYN-NO-WARNING: HasTailCall: No +# ET-DYN-NO-WARNING: IsEHPad: No +# ET-DYN-NO-WARNING: CanFallThrough: No +# ET-DYN-NO-WARNING: HasIndirectBranch: No +# ET-DYN-NO-WARNING: } +# ET-DYN-NO-WARNING: ] # ET-DYN-NO-WARNING: } # ET-DYN-NO-WARNING: ] # ET-DYN-NO-WARNING: } diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test index 0593f04d6e30..c5d071c11d1d 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test @@ -15,8 +15,8 @@ # RUN: llvm-readelf %t1.x32.o --bb-addr-map | FileCheck %s --check-prefix=GNU ## Check that a malformed section can be handled. -# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DSIZE=6 -o %t2.o -# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck %s -DOFFSET=0x00000006 -DFILE=%t2.o --check-prefix=TRUNCATED +# RUN: yaml2obj --docnum=1 %s -DBITS=32 -DSIZE=7 -o %t2.o +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck %s -DOFFSET=0x00000007 -DFILE=%t2.o --check-prefix=TRUNCATED ## Check that invalid metadata can be handled. # RUN: yaml2obj --docnum=1 %s -DBITS=32 -DMETADATA=0xF000002 -o %t3.o @@ -27,42 +27,57 @@ # CHECK-NEXT: At: [[ADDR]] # CHECK-NEXT: warning: '[[FILE]]': could not identify function symbol for address ([[ADDR]]) in SHT_LLVM_BB_ADDR_MAP section with index 3 # CHECK-NEXT: Name: -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 0 -# CHECK-NEXT: Offset: 0x0 -# CHECK-NEXT: Size: 0x1 -# CHECK-NEXT: HasReturn: No -# CHECK-NEXT: HasTailCall: Yes -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: No -# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: Base Address: [[ADDR]] +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 0 +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Size: 0x1 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: Yes +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: { -# CHECK-NEXT: ID: 2 -# CHECK-NEXT: Offset: 0x4 -# CHECK-NEXT: Size: 0x4 -# CHECK-NEXT: HasReturn: Yes -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: Yes -# CHECK-NEXT: CanFallThrough: No -# CHECK-NEXT: HasIndirectBranch: Yes +# CHECK-NEXT: Base Address: 0x44444 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 2 +# CHECK-NEXT: Offset: 0x3 +# CHECK-NEXT: Size: 0x4 +# CHECK-NEXT: HasReturn: Yes +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: Yes +# CHECK-NEXT: CanFallThrough: No +# CHECK-NEXT: HasIndirectBranch: Yes +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: Function { # CHECK-NEXT: At: 0x22222 # CHECK-NEXT: Name: foo -# CHECK-NEXT: BB entries [ +# CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { -# CHECK-NEXT: ID: 4 -# CHECK-NEXT: Offset: 0x6 -# CHECK-NEXT: Size: 0x7 -# CHECK-NEXT: HasReturn: No -# CHECK-NEXT: HasTailCall: No -# CHECK-NEXT: IsEHPad: No -# CHECK-NEXT: CanFallThrough: Yes -# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: Base Address: 0x22222 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: { +# CHECK-NEXT: ID: 4 +# CHECK-NEXT: Offset: 0x6 +# CHECK-NEXT: Size: 0x7 +# CHECK-NEXT: HasReturn: No +# CHECK-NEXT: HasTailCall: No +# CHECK-NEXT: IsEHPad: No +# CHECK-NEXT: CanFallThrough: Yes +# CHECK-NEXT: HasIndirectBranch: No +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } @@ -78,26 +93,31 @@ # TRUNCATED-NEXT: Function { # TRUNCATED-NEXT: At: 0x33333 # TRUNCATED-NEXT: Name: bar -# TRUNCATED-NEXT: BB entries [ +# TRUNCATED-NEXT: BB Ranges [ # TRUNCATED-NEXT: { -# TRUNCATED-NEXT: ID: 6 -# TRUNCATED-NEXT: Offset: 0x9 -# TRUNCATED-NEXT: Size: 0xA -# TRUNCATED-NEXT: HasReturn: Yes -# TRUNCATED-NEXT: HasTailCall: Yes -# TRUNCATED-NEXT: IsEHPad: No -# TRUNCATED-NEXT: CanFallThrough: Yes -# TRUNCATED-NEXT: HasIndirectBranch: Yes -# TRUNCATED-NEXT: } -# TRUNCATED-NEXT: { -# TRUNCATED-NEXT: ID: 7 -# TRUNCATED-NEXT: Offset: 0x1F -# TRUNCATED-NEXT: Size: 0xD -# TRUNCATED-NEXT: HasReturn: No -# TRUNCATED-NEXT: HasTailCall: Yes -# TRUNCATED-NEXT: IsEHPad: Yes -# TRUNCATED-NEXT: CanFallThrough: Yes -# TRUNCATED-NEXT: HasIndirectBranch: No +# TRUNCATED-NEXT: Base Address: 0x33333 +# TRUNCATED-NEXT: BB Entries [ +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 6 +# TRUNCATED-NEXT: Offset: 0x9 +# TRUNCATED-NEXT: Size: 0xA +# TRUNCATED-NEXT: HasReturn: Yes +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: No +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: Yes +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: { +# TRUNCATED-NEXT: ID: 7 +# TRUNCATED-NEXT: Offset: 0x1F +# TRUNCATED-NEXT: Size: 0xD +# TRUNCATED-NEXT: HasReturn: No +# TRUNCATED-NEXT: HasTailCall: Yes +# TRUNCATED-NEXT: IsEHPad: Yes +# TRUNCATED-NEXT: CanFallThrough: Yes +# TRUNCATED-NEXT: HasIndirectBranch: No +# TRUNCATED-NEXT: } +# TRUNCATED-NEXT: ] # TRUNCATED-NEXT: } # TRUNCATED-NEXT: ] # TRUNCATED-NEXT: } @@ -124,23 +144,28 @@ Sections: Link: .text Entries: - Version: 2 - Address: [[ADDR=0x11111]] - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x1 - Metadata: [[METADATA=0x2]] - - ID: 2 - AddressOffset: 0x3 - Size: 0x4 - Metadata: 0x15 + Feature: 0x8 + BBRanges: + - BaseAddress: [[ADDR=0x11111]] + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x1 + Metadata: [[METADATA=0x2]] + - BaseAddress: 0x44444 + BBEntries: + - ID: 2 + AddressOffset: 0x3 + Size: 0x4 + Metadata: 0x15 - Version: 2 - Address: 0x22222 - BBEntries: - - ID: 4 - AddressOffset: 0x6 - Size: 0x7 - Metadata: 0x8 + BBRanges: + - BaseAddress: 0x22222 + BBEntries: + - ID: 4 + AddressOffset: 0x6 + Size: 0x7 + Metadata: 0x8 - Name: dummy_section Type: SHT_PROGBITS Size: 16 @@ -149,16 +174,17 @@ Sections: Link: .text.bar Entries: - Version: 2 - Address: 0x33333 - BBEntries: - - ID: 6 - AddressOffset: 0x9 - Size: 0xa - Metadata: 0x1b - - ID: 7 - AddressOffset: 0xc - Size: 0xd - Metadata: 0xe + BBRanges: + - BaseAddress: 0x33333 + BBEntries: + - ID: 6 + AddressOffset: 0x9 + Size: 0xa + Metadata: 0x1b + - ID: 7 + AddressOffset: 0xc + Size: 0xd + Metadata: 0xe Symbols: - Name: foo Section: .text @@ -168,103 +194,3 @@ Symbols: Section: .text.bar Type: STT_FUNC Value: 0x33333 - -## Check that using the SHT_LLVM_BB_ADDR_MAP_V0 section type generates the same -## result as using the SHT_LLVM_BB_ADDR_MAP section type with Version=0. -## The Version field is required even for SHT_LLVM_BB_ADDR_MAP_V0 but it -## should not impact the result. This unideal behavior will be gone once -## SHT_LLVM_BB_ADDR_MAP_V0 is deprecated. - -# RUN: yaml2obj --docnum=2 %s -DVERSION=255 -DSECTION_TYPE=SHT_LLVM_BB_ADDR_MAP_V0 -o %t2.type0 -# RUN: llvm-readobj %t2.type0 --bb-addr-map 2>&1 | FileCheck %s --check-prefix=V0 - -# RUN: yaml2obj --docnum=2 %s -DVERSION=0 -DSECTION_TYPE=SHT_LLVM_BB_ADDR_MAP -o %t2.version0 -# RUN: llvm-readobj %t2.version0 --bb-addr-map 2>&1 | FileCheck %s --check-prefix=V0 - -# V0: BBAddrMap [ -# V0-NEXT: Function { -# V0-NEXT: At: 0x11111 -# V0-NEXT: Name: foo -# V0-NEXT: BB entries [ -# V0-NEXT: { -# V0-NEXT: ID: 0 -# V0-NEXT: Offset: 0x1 -# V0-NEXT: Size: 0x2 -# V0-NEXT: HasReturn: -# V0-NEXT: HasTailCall: -# V0-NEXT: IsEHPad: -# V0-NEXT: CanFallThrough: -# V0-NEXT: HasIndirectBranch: -# V0-NEXT: } -# V0-NEXT: { -# V0-NEXT: ID: 1 -# V0-NEXT: Offset: 0x4 -# V0-NEXT: Size: 0x5 -# V0-NEXT: HasReturn: -# V0-NEXT: HasTailCall: -# V0-NEXT: IsEHPad: -# V0-NEXT: CanFallThrough: -# V0-NEXT: HasIndirectBranch: -# V0-NEXT: } -# V0-NEXT: ] -# V0-NEXT: } - -## Check version 1 (without BB IDs). -# RUN: yaml2obj --docnum=2 %s -DVERSION=1 -DSECTION_TYPE=SHT_LLVM_BB_ADDR_MAP -o %t3 -# RUN: llvm-readobj %t3 --bb-addr-map 2>&1 | FileCheck %s --check-prefix=V1 - -# V1: BBAddrMap [ -# V1-NEXT: Function { -# V1-NEXT: At: 0x11111 -# V1-NEXT: Name: foo -# V1-NEXT: BB entries [ -# V1-NEXT: { -# V1-NEXT: ID: 0 -# V1-NEXT: Offset: 0x1 -# V1-NEXT: Size: 0x2 -# V1-NEXT: HasReturn: -# V1-NEXT: HasTailCall: -# V1-NEXT: IsEHPad: -# V1-NEXT: CanFallThrough: -# V1-NEXT: HasIndirectBranch: -# V1-NEXT: } -# V1-NEXT: { -# V1-NEXT: ID: 1 -# V1-NEXT: Offset: 0x7 -# V1-NEXT: Size: 0x5 -# V1-NEXT: HasReturn: -# V1-NEXT: HasTailCall: -# V1-NEXT: IsEHPad: -# V1-NEXT: CanFallThrough: -# V1-NEXT: HasIndirectBranch: -# V1-NEXT: } -# V1-NEXT: ] -# V1-NEXT: } - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .text.foo - Type: SHT_PROGBITS - Flags: [SHF_ALLOC] - - Name: .llvm_bb_addr_map - Type: [[SECTION_TYPE]] - Link: .text.foo - Entries: - - Version: [[VERSION]] - Address: 0x11111 - BBEntries: - - AddressOffset: 0x1 - Size: 0x2 - Metadata: 0x3 - - AddressOffset: 0x4 - Size: 0x5 - Metadata: 0x6 -Symbols: - - Name: foo - Section: .text.foo - Type: STT_FUNC - Value: 0x11111 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml index 629c29e202ae..8dbf97ef2bc1 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml @@ -14,30 +14,31 @@ # VALID-NEXT: - Name: .llvm_bb_addr_map # VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP # VALID-NEXT: Entries: -## The 'Address' field is omitted when it's zero. # VALID-NEXT: - Version: 2 -# VALID-NEXT: Feature: 0xFF -# VALID-NEXT: BBEntries: -# VALID-NEXT: - ID: 0 -# VALID-NEXT: AddressOffset: 0x1 -# VALID-NEXT: Size: 0x2 -# VALID-NEXT: Metadata: 0x3 -# VALID-NEXT: - ID: 2 -# VALID-NEXT: AddressOffset: 0x4 -# VALID-NEXT: Size: 0x5 -# VALID-NEXT: Metadata: 0x6 -# VALID-NEXT: - ID: 4 -# VALID-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 -# VALID-NEXT: Size: 0xFFFFFFFFFFFFFFF8 -# VALID-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 +# VALID-NEXT: BBRanges: +## The 'BaseAddress' field is omitted when it's zero. +# VALID-NEXT: - BBEntries: +# VALID-NEXT: - ID: 0 +# VALID-NEXT: AddressOffset: 0x1 +# VALID-NEXT: Size: 0x2 +# VALID-NEXT: Metadata: 0x3 +# VALID-NEXT: - ID: 2 +# VALID-NEXT: AddressOffset: 0x4 +# VALID-NEXT: Size: 0x5 +# VALID-NEXT: Metadata: 0x6 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 +# VALID-NEXT: Size: 0xFFFFFFFFFFFFFFF8 +# VALID-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 # VALID-NEXT: - Version: 2 -# VALID-NEXT: Feature: 0xEE -# VALID-NEXT: Address: 0xFFFFFFFFFFFFFF20 -# VALID-NEXT: BBEntries: -# VALID-NEXT: - ID: 6 -# VALID-NEXT: AddressOffset: 0xA -# VALID-NEXT: Size: 0xB -# VALID-NEXT: Metadata: 0xC +# VALID-NEXT: Feature: 0x8 +# VALID-NEXT: BBRanges: +# VALID-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20 +# VALID-NEXT: BBEntries: +# VALID-NEXT: - ID: 6 +# VALID-NEXT: AddressOffset: 0xA +# VALID-NEXT: Size: 0xB +# VALID-NEXT: Metadata: 0xC --- !ELF FileHeader: @@ -50,30 +51,33 @@ Sections: ShSize: [[SIZE=]] Entries: - Version: 2 - Feature: 0xFF - Address: 0x0 - BBEntries: - - ID: 0 - AddressOffset: 0x1 - Size: 0x2 - Metadata: 0x3 - - ID: 2 - AddressOffset: 0x4 - Size: 0x5 - Metadata: 0x6 - - ID: 4 - AddressOffset: 0xFFFFFFFFFFFFFFF7 - Size: 0xFFFFFFFFFFFFFFF8 - Metadata: 0xFFFFFFFFFFFFFFF9 + Feature: 0x0 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 0 + AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + - ID: 2 + AddressOffset: 0x4 + Size: 0x5 + Metadata: 0x6 + - ID: 4 + AddressOffset: 0xFFFFFFFFFFFFFFF7 + Size: 0xFFFFFFFFFFFFFFF8 + Metadata: 0xFFFFFFFFFFFFFFF9 - Version: 2 - Feature: 0xEE - Address: 0xFFFFFFFFFFFFFF20 - NumBlocks: [[NUMBLOCKS=]] - BBEntries: - - ID: 6 - AddressOffset: 0xA - Size: 0xB - Metadata: 0xC + Feature: 0x8 + NumBBRanges: [[NUMBBRANGES=]] + BBRanges: + - BaseAddress: 0xFFFFFFFFFFFFFF20 + NumBlocks: [[NUMBLOCKS=]] + BBEntries: + - ID: 6 + AddressOffset: 0xA + Size: 0xB + Metadata: 0xC ## Check obj2yaml can dump empty .llvm_bb_addr_map sections. @@ -114,19 +118,21 @@ Sections: # MULTI-NEXT: - Name: .llvm_bb_addr_map # MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP # MULTI-NEXT: Entries: -## Fields 'Address' and 'Feature' are omitted when they are zero. +## Fields 'BaseAddress' and 'Feature' are omitted when they are zero. # MULTI-NEXT: - Version: 0 -# MULTI-NEXT: BBEntries: -# MULTI-NEXT: - ID: 0 -# MULTI-NEXT: AddressOffset: 0x1 -# MULTI-NEXT: Size: 0x2 -# MULTI-NEXT: Metadata: 0x3 +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BBEntries: +# MULTI-NEXT: - ID: 0 +# MULTI-NEXT: AddressOffset: 0x1 +# MULTI-NEXT: Size: 0x2 +# MULTI-NEXT: Metadata: 0x3 # MULTI-NEXT: - Name: '.llvm_bb_addr_map (1)' # MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP # MULTI-NEXT: Entries: # MULTI-NEXT: - Version: 0 -# MULTI-NEXT: Address: 0x20 -# MULTI-NEXT: BBEntries: [] +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BaseAddress: 0x20 +# MULTI-NEXT: BBEntries: [] --- !ELF FileHeader: @@ -141,26 +147,31 @@ Sections: ## they are zero. - Version: 0 Feature: 0x0 - Address: 0x0 - BBEntries: - - AddressOffset: 0x1 - Size: 0x2 - Metadata: 0x3 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 - Name: '.llvm_bb_addr_map (1)' Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 0 - Address: 0x20 + BBRanges: + - BaseAddress: 0x20 ## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section ## when it can't extract the entries, for example, when the section is truncated, or -## when an invalid 'NumBlocks' field is specified. +## when an invalid 'NumBlocks' or 'NumBBRanges` field is specified. # RUN: yaml2obj --docnum=1 -DSIZE=0x8 %s -o %t4 # RUN: obj2yaml %t4 | FileCheck %s --check-prefixes=TRUNCATED,INVALID # RUN: yaml2obj --docnum=1 -DNUMBLOCKS=2 %s -o %t5 -# RUN: obj2yaml %t5 | FileCheck %s --check-prefixes=BADNUMBLOCKS,INVALID +# RUN: obj2yaml %t5 | FileCheck %s --check-prefixes=BADNUM,INVALID + +# RUN: yaml2obj --docnum=1 -DNUMBBRANGES=2 %s -o %t6 +# RUN: obj2yaml %t6 | FileCheck %s --check-prefixes=BADNUM,INVALID # INVALID: --- !ELF # INVALID-NEXT: FileHeader: @@ -170,86 +181,5 @@ Sections: # INVALID-NEXT: Sections: # INVALID-NEXT: - Name: .llvm_bb_addr_map # INVALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP -# BADNUMBLOCKS-NEXT: Content: {{([[:xdigit:]]+)}}{{$}} -# TRUNCATED-NEXT: Content: {{([[:xdigit:]]{16})}}{{$}} - -## Check obj2yaml for SHT_LLVM_BB_ADDR_MAP_V0. -# RUN: yaml2obj --docnum=4 %s -o %t6 -# RUN: obj2yaml %t6 | FileCheck %s --check-prefix=V0 - -# V0: --- !ELF -# V0-NEXT: FileHeader: -# V0-NEXT: Class: ELFCLASS64 -# V0-NEXT: Data: ELFDATA2LSB -# V0-NEXT: Type: ET_EXEC -# V0-NEXT: Sections: -# V0-NEXT: - Name: .llvm_bb_addr_map -# V0-NEXT: Type: SHT_LLVM_BB_ADDR_MAP_V0 -# V0-NEXT: Entries: -# V0-NEXT: - Version: 0 -# V0-NEXT: Address: 0x1111 -# V0-NEXT: BBEntries: -# V0-NEXT: - ID: 0 -# V0-NEXT: AddressOffset: 0x1 -# V0-NEXT: Size: 0x2 -# V0-NEXT: Metadata: 0x3 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .llvm_bb_addr_map - Type: SHT_LLVM_BB_ADDR_MAP_V0 - Entries: - - Version: 0 - Address: 0x1111 - BBEntries: - - AddressOffset: 0x1 - Size: 0x2 - Metadata: 0x3 - -## Check obj2yaml for version 1. -# RUN: yaml2obj --docnum=5 %s -o %t7 -# RUN: obj2yaml %t7 | FileCheck %s --check-prefix=V1 - -# V1: --- !ELF -# V1-NEXT: FileHeader: -# V1-NEXT: Class: ELFCLASS64 -# V1-NEXT: Data: ELFDATA2LSB -# V1-NEXT: Type: ET_EXEC -# V1-NEXT: Sections: -# V1-NEXT: - Name: .llvm_bb_addr_map -# V1-NEXT: Type: SHT_LLVM_BB_ADDR_MAP -# V1-NEXT: Entries: -# V1-NEXT: - Version: 1 -# V1-NEXT: Address: 0x1111 -# V1-NEXT: BBEntries: -# V1-NEXT: - ID: 0 -# V1-NEXT: AddressOffset: 0x1 -# V1-NEXT: Size: 0x2 -# V1-NEXT: Metadata: 0x3 -# V1-NEXT: - ID: 1 -# V1-NEXT: AddressOffset: 0x4 -# V1-NEXT: Size: 0x5 -# V1-NEXT: Metadata: 0x6 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .llvm_bb_addr_map - Type: SHT_LLVM_BB_ADDR_MAP - Entries: - - Version: 1 - Address: 0x1111 - BBEntries: - - AddressOffset: 0x1 - Size: 0x2 - Metadata: 0x3 - - AddressOffset: 0x4 - Size: 0x5 - Metadata: 0x6 +# BADNUM-NEXT: Content: {{([[:xdigit:]]+)}}{{$}} +# TRUNCATED-NEXT: Content: '{{([[:xdigit:]]{16})}}'{{$}} diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 2086dc53208b..709938babffb 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -53,6 +53,21 @@ # CHECK-NEXT: 0000: 02002000 00000000 0000020D 010203 # CHECK-NEXT: ) +# Case 7: Specify empty BBRanges. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 020000 +# CHECK-NEXT: ) + +# Case 8: Specify empty BBRanges with multi-bb-range. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 020800 +# CHECK-NEXT: ) + + + + --- !ELF FileHeader: Class: ELFCLASS64 @@ -86,12 +101,13 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 2 - Address: 0x0000000000000020 - BBEntries: - - ID: 11 - AddressOffset: 0x00000001 - Size: 0x00000002 - Metadata: 0x00000003 + BBRanges: + - BaseAddress: 0x0000000000000020 + BBEntries: + - ID: 11 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 ## 5) When specifying the description with Entries, the 'Address' field will be ## zero when omitted. @@ -99,11 +115,12 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 2 - BBEntries: - - ID: 12 - AddressOffset: 0x00000001 - Size: 0x00000002 - Metadata: 0x00000003 + BBRanges: + - BBEntries: + - ID: 12 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 ## 6) We can override the NumBlocks field with a value different from the ## actual number of BB Entries. @@ -111,13 +128,31 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 2 - Address: 0x0000000000000020 - NumBlocks: 2 - BBEntries: - - ID: 13 - AddressOffset: 0x00000001 - Size: 0x00000002 - Metadata: 0x00000003 + BBRanges: + - BaseAddress: 0x0000000000000020 + NumBlocks: 2 + BBEntries: + - ID: 13 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + +## 7) We can produce a SHT_LLVM_BB_ADDR_MAP section from a description +## with one entry with empty BBRanges. + - Name: '.llvm_bb_addr_map (7)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + BBRanges: [] + +## 8) We can produce a SHT_LLVM_BB_ADDR_MAP section from a multi-bb-range +## description with one entry with empty BBRanges. + - Name: '.llvm_bb_addr_map (8)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x8 + BBRanges: [] ## Check we can't use Entries at the same time as either Content or Size. # RUN: not yaml2obj --docnum=2 -DCONTENT="00" %s 2>&1 | FileCheck %s --check-prefix=INVALID diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index ff71d91e07fb..3926af60c1ee 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -183,6 +183,110 @@ public: "Mach-O object file displaying tool") {} }; +struct BBAddrMapLabel { + std::string BlockLabel; + std::string PGOAnalysis; +}; + +// This class represents the BBAddrMap and PGOMap associated with a single +// function. +class BBAddrMapFunctionEntry { +public: + BBAddrMapFunctionEntry(BBAddrMap AddrMap, PGOAnalysisMap PGOMap) + : AddrMap(std::move(AddrMap)), PGOMap(std::move(PGOMap)) {} + + const BBAddrMap &getAddrMap() const { return AddrMap; } + + // Returns the PGO string associated with the entry of index `PGOBBEntryIndex` + // in `PGOMap`. + std::string constructPGOLabelString(size_t PGOBBEntryIndex) const { + if (!PGOMap.FeatEnable.hasPGOAnalysis()) + return ""; + std::string PGOString; + raw_string_ostream PGOSS(PGOString); + + PGOSS << " ("; + if (PGOMap.FeatEnable.FuncEntryCount && PGOBBEntryIndex == 0) { + PGOSS << "Entry count: " << Twine(PGOMap.FuncEntryCount); + if (PGOMap.FeatEnable.hasPGOAnalysisBBData()) { + PGOSS << ", "; + } + } + + if (PGOMap.FeatEnable.hasPGOAnalysisBBData()) { + + assert(PGOBBEntryIndex < PGOMap.BBEntries.size() && + "Expected PGOAnalysisMap and BBAddrMap to have the same entries"); + const PGOAnalysisMap::PGOBBEntry &PGOBBEntry = + PGOMap.BBEntries[PGOBBEntryIndex]; + + if (PGOMap.FeatEnable.BBFreq) { + PGOSS << "Frequency: " << Twine(PGOBBEntry.BlockFreq.getFrequency()); + if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { + PGOSS << ", "; + } + } + if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { + PGOSS << "Successors: "; + interleaveComma( + PGOBBEntry.Successors, PGOSS, + [&PGOSS](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { + PGOSS << "BB" << SE.ID << ":"; + PGOSS.write_hex(SE.Prob.getNumerator()); + }); + } + } + PGOSS << ")"; + + return PGOString; + } + +private: + const BBAddrMap AddrMap; + const PGOAnalysisMap PGOMap; +}; + +// This class represents the BBAddrMap and PGOMap of potentially multiple +// functions in a section. +class BBAddrMapInfo { +public: + void clear() { + FunctionAddrToMap.clear(); + RangeBaseAddrToFunctionAddr.clear(); + } + + bool empty() const { return FunctionAddrToMap.empty(); } + + void AddFunctionEntry(BBAddrMap AddrMap, PGOAnalysisMap PGOMap) { + uint64_t FunctionAddr = AddrMap.getFunctionAddress(); + for (size_t I = 1; I < AddrMap.BBRanges.size(); ++I) + RangeBaseAddrToFunctionAddr.emplace(AddrMap.BBRanges[I].BaseAddress, + FunctionAddr); + [[maybe_unused]] auto R = FunctionAddrToMap.try_emplace( + FunctionAddr, std::move(AddrMap), std::move(PGOMap)); + assert(R.second && "duplicate function address"); + } + + // Returns the BBAddrMap entry for the function associated with `BaseAddress`. + // `BaseAddress` could be the function address or the address of a range + // associated with that function. Returns `nullptr` if `BaseAddress` is not + // mapped to any entry. + const BBAddrMapFunctionEntry *getEntryForAddress(uint64_t BaseAddress) const { + uint64_t FunctionAddr = BaseAddress; + auto S = RangeBaseAddrToFunctionAddr.find(BaseAddress); + if (S != RangeBaseAddrToFunctionAddr.end()) + FunctionAddr = S->second; + auto R = FunctionAddrToMap.find(FunctionAddr); + if (R == FunctionAddrToMap.end()) + return nullptr; + return &R->second; + } + +private: + std::unordered_map FunctionAddrToMap; + std::unordered_map RangeBaseAddrToFunctionAddr; +}; + } // namespace #define DEBUG_TYPE "objdump" @@ -1144,80 +1248,38 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj, return SymbolInfoTy(Addr, Name, Type); } -struct BBAddrMapLabel { - std::string BlockLabel; - std::string PGOAnalysis; -}; - -static std::string constructPGOLabelString(const PGOAnalysisMap &PGOMap, - size_t BBEntryIndex) { - std::string PGOString; - raw_string_ostream PGOSS(PGOString); - - PGOSS << " ("; - if (PGOMap.FeatEnable.FuncEntryCount && BBEntryIndex == 0) { - PGOSS << "Entry count: " << Twine(PGOMap.FuncEntryCount); - if (PGOMap.FeatEnable.BBFreq || PGOMap.FeatEnable.BrProb) { - PGOSS << ", "; - } - } - - if (PGOMap.FeatEnable.BBFreq || PGOMap.FeatEnable.BrProb) { - assert(BBEntryIndex < PGOMap.BBEntries.size() && - "Expected PGOAnalysisMap and BBAddrMap to have the same entires"); - const PGOAnalysisMap::PGOBBEntry &PGOBBEntry = - PGOMap.BBEntries[BBEntryIndex]; - - if (PGOMap.FeatEnable.BBFreq) { - PGOSS << "Frequency: " << Twine(PGOBBEntry.BlockFreq.getFrequency()); - if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { - PGOSS << ", "; - } - } - if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { - PGOSS << "Successors: "; - interleaveComma( - PGOBBEntry.Successors, PGOSS, - [&PGOSS](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { - PGOSS << "BB" << SE.ID << ":"; - PGOSS.write_hex(SE.Prob.getNumerator()); - }); - } - } - PGOSS << ")"; - - return PGOString; -} - static void collectBBAddrMapLabels( - const std::unordered_map &AddrToBBAddrMap, - const std::unordered_map &AddrToPGOAnalysisMap, - uint64_t SectionAddr, uint64_t Start, uint64_t End, - std::unordered_map> &Labels, - const StringRef FileName) { - if (AddrToBBAddrMap.empty()) + const BBAddrMapInfo &FullAddrMap, uint64_t SectionAddr, uint64_t Start, + uint64_t End, + std::unordered_map> &Labels) { + if (FullAddrMap.empty()) return; Labels.clear(); uint64_t StartAddress = SectionAddr + Start; uint64_t EndAddress = SectionAddr + End; - auto Iter = AddrToBBAddrMap.find(StartAddress); - if (Iter == AddrToBBAddrMap.end()) + const BBAddrMapFunctionEntry *FunctionMap = + FullAddrMap.getEntryForAddress(StartAddress); + if (!FunctionMap) return; - auto PGOIter = AddrToPGOAnalysisMap.find(StartAddress); - - for (size_t I = 0; I < Iter->second.getBBEntries().size(); ++I) { - const BBAddrMap::BBEntry &BBEntry = Iter->second.getBBEntries()[I]; - uint64_t BBAddress = BBEntry.Offset + Iter->second.getFunctionAddress(); + std::optional BBRangeIndex = + FunctionMap->getAddrMap().getBBRangeIndexForBaseAddress(StartAddress); + if (!BBRangeIndex) + return; + size_t NumBBEntriesBeforeRange = 0; + for (size_t I = 0; I < *BBRangeIndex; ++I) + NumBBEntriesBeforeRange += + FunctionMap->getAddrMap().BBRanges[I].BBEntries.size(); + const auto &BBRange = FunctionMap->getAddrMap().BBRanges[*BBRangeIndex]; + for (size_t I = 0; I < BBRange.BBEntries.size(); ++I) { + const BBAddrMap::BBEntry &BBEntry = BBRange.BBEntries[I]; + uint64_t BBAddress = BBEntry.Offset + BBRange.BaseAddress; if (BBAddress >= EndAddress) continue; std::string LabelString = ("BB" + Twine(BBEntry.ID)).str(); - std::string PGOString; - - if (PGOIter != AddrToPGOAnalysisMap.end()) - PGOString = constructPGOLabelString(PGOIter->second, I); - - Labels[BBAddress].push_back({LabelString, PGOString}); + Labels[BBAddress].push_back( + {LabelString, + FunctionMap->constructPGOLabelString(NumBBEntriesBeforeRange + I)}); } } @@ -1517,11 +1579,10 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, LLVM_DEBUG(LVP.dump()); - std::unordered_map AddrToBBAddrMap; - std::unordered_map AddrToPGOAnalysisMap; + BBAddrMapInfo FullAddrMap; auto ReadBBAddrMap = [&](std::optional SectionIndex = std::nullopt) { - AddrToBBAddrMap.clear(); + FullAddrMap.clear(); if (const auto *Elf = dyn_cast(&Obj)) { std::vector PGOAnalyses; auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex, &PGOAnalyses); @@ -1531,10 +1592,8 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, } for (auto &&[FunctionBBAddrMap, FunctionPGOAnalysis] : zip_equal(*std::move(BBAddrMapsOrErr), std::move(PGOAnalyses))) { - uint64_t Addr = FunctionBBAddrMap.Addr; - AddrToBBAddrMap.emplace(Addr, std::move(FunctionBBAddrMap)); - if (FunctionPGOAnalysis.FeatEnable.anyEnabled()) - AddrToPGOAnalysisMap.emplace(Addr, std::move(FunctionPGOAnalysis)); + FullAddrMap.AddFunctionEntry(std::move(FunctionBBAddrMap), + std::move(FunctionPGOAnalysis)); } } }; @@ -1841,9 +1900,8 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, if (SymbolizeOperands) { collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI, SectionAddr, Index, End, AllLabels); - collectBBAddrMapLabels(AddrToBBAddrMap, AddrToPGOAnalysisMap, - SectionAddr, Index, End, BBAddrMapLabels, - FileName); + collectBBAddrMapLabels(FullAddrMap, SectionAddr, Index, End, + BBAddrMapLabels); } while (Index < End) { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 815fe31d4a82..13c19aab2d60 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7418,8 +7418,7 @@ template void LLVMELFDumper::printBBAddrMaps() { bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; using Elf_Shdr = typename ELFT::Shdr; auto IsMatch = [](const Elf_Shdr &Sec) -> bool { - return Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP || - Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP_V0; + return Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP; }; Expected> SecRelocMapOrErr = this->Obj.getSectionAndRelocations(IsMatch); @@ -7450,40 +7449,46 @@ template void LLVMELFDumper::printBBAddrMaps() { } for (const auto &[AM, PAM] : zip_equal(*BBAddrMapOrErr, PGOAnalyses)) { DictScope D(W, "Function"); - W.printHex("At", AM.Addr); + W.printHex("At", AM.getFunctionAddress()); SmallVector FuncSymIndex = - this->getSymbolIndexesForFunctionAddress(AM.Addr, FunctionSec); + this->getSymbolIndexesForFunctionAddress(AM.getFunctionAddress(), + FunctionSec); std::string FuncName = ""; if (FuncSymIndex.empty()) this->reportUniqueWarning( "could not identify function symbol for address (0x" + - Twine::utohexstr(AM.Addr) + ") in " + this->describe(*Sec)); + Twine::utohexstr(AM.getFunctionAddress()) + ") in " + + this->describe(*Sec)); else FuncName = this->getStaticSymbolName(FuncSymIndex.front()); W.printString("Name", FuncName); - { - ListScope L(W, "BB entries"); - for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) { - DictScope L(W); - W.printNumber("ID", BBE.ID); - W.printHex("Offset", BBE.Offset); - W.printHex("Size", BBE.Size); - W.printBoolean("HasReturn", BBE.hasReturn()); - W.printBoolean("HasTailCall", BBE.hasTailCall()); - W.printBoolean("IsEHPad", BBE.isEHPad()); - W.printBoolean("CanFallThrough", BBE.canFallThrough()); - W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); + ListScope BBRL(W, "BB Ranges"); + for (const BBAddrMap::BBRangeEntry &BBR : AM.BBRanges) { + DictScope BBRD(W); + W.printHex("Base Address", BBR.BaseAddress); + ListScope BBEL(W, "BB Entries"); + for (const BBAddrMap::BBEntry &BBE : BBR.BBEntries) { + DictScope BBED(W); + W.printNumber("ID", BBE.ID); + W.printHex("Offset", BBE.Offset); + W.printHex("Size", BBE.Size); + W.printBoolean("HasReturn", BBE.hasReturn()); + W.printBoolean("HasTailCall", BBE.hasTailCall()); + W.printBoolean("IsEHPad", BBE.isEHPad()); + W.printBoolean("CanFallThrough", BBE.canFallThrough()); + W.printBoolean("HasIndirectBranch", BBE.hasIndirectBranch()); + } } } - if (PAM.FeatEnable.anyEnabled()) { + if (PAM.FeatEnable.hasPGOAnalysis()) { DictScope PD(W, "PGO analyses"); if (PAM.FeatEnable.FuncEntryCount) W.printNumber("FuncEntryCount", PAM.FuncEntryCount); - if (PAM.FeatEnable.BBFreq || PAM.FeatEnable.BrProb) { + if (PAM.FeatEnable.hasPGOAnalysisBBData()) { ListScope L(W, "PGO BB entries"); for (const PGOAnalysisMap::PGOBBEntry &PBBE : PAM.BBEntries) { DictScope L(W); diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index b261b9dc6f6e..ddd21ed889f2 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -626,7 +626,6 @@ ELFDumper::dumpSections() { case ELF::SHT_LLVM_CALL_GRAPH_PROFILE: return [this](const Elf_Shdr *S) { return dumpCallGraphProfileSection(S); }; - case ELF::SHT_LLVM_BB_ADDR_MAP_V0: case ELF::SHT_LLVM_BB_ADDR_MAP: return [this](const Elf_Shdr *S) { return dumpBBAddrMapSection(S); }; case ELF::SHT_STRTAB: @@ -893,6 +892,7 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { DataExtractor::Cursor Cur(0); uint8_t Version = 0; uint8_t Feature = 0; + uint64_t Address = 0; while (Cur && Cur.tell() < Content.size()) { if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { Version = Data.getU8(Cur); @@ -903,19 +903,41 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { Twine(static_cast(Version))); Feature = Data.getU8(Cur); } - uint64_t Address = Data.getAddress(Cur); - uint64_t NumBlocks = Data.getULEB128(Cur); - std::vector BBEntries; - // Read the specified number of BB entries, or until decoding fails. - for (uint64_t BlockIndex = 0; Cur && BlockIndex < NumBlocks; ++BlockIndex) { - uint32_t ID = Version >= 2 ? Data.getULEB128(Cur) : BlockIndex; - uint64_t Offset = Data.getULEB128(Cur); - uint64_t Size = Data.getULEB128(Cur); - uint64_t Metadata = Data.getULEB128(Cur); - BBEntries.push_back({ID, Offset, Size, Metadata}); + uint64_t NumBBRanges = 1; + uint64_t NumBlocks = 0; + auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(Feature); + if (!FeatureOrErr) + return FeatureOrErr.takeError(); + if (FeatureOrErr->MultiBBRange) { + NumBBRanges = Data.getULEB128(Cur); + } else { + Address = Data.getAddress(Cur); + NumBlocks = Data.getULEB128(Cur); + } + std::vector BBRanges; + uint64_t BaseAddress = 0; + for (uint64_t BBRangeN = 0; Cur && BBRangeN != NumBBRanges; ++BBRangeN) { + if (FeatureOrErr->MultiBBRange) { + BaseAddress = Data.getAddress(Cur); + NumBlocks = Data.getULEB128(Cur); + } else { + BaseAddress = Address; + } + + std::vector BBEntries; + // Read the specified number of BB entries, or until decoding fails. + for (uint64_t BlockIndex = 0; Cur && BlockIndex < NumBlocks; + ++BlockIndex) { + uint32_t ID = Version >= 2 ? Data.getULEB128(Cur) : BlockIndex; + uint64_t Offset = Data.getULEB128(Cur); + uint64_t Size = Data.getULEB128(Cur); + uint64_t Metadata = Data.getULEB128(Cur); + BBEntries.push_back({ID, Offset, Size, Metadata}); + } + BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries}); } Entries.push_back( - {Version, Feature, Address, /*NumBlocks=*/{}, std::move(BBEntries)}); + {Version, Feature, /*NumBBRanges=*/{}, std::move(BBRanges)}); } if (!Cur) { diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index 91f5d9356140..ae3d2ea22804 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -478,7 +478,6 @@ Sections: - Type: SHT_LLVM_BB_ADDR_MAP Name: .llvm_bb_addr_map Entries: - - Address: 0x11111 )"); auto DoCheck = [&](StringRef YamlString, const char *ErrMsg) { @@ -498,11 +497,13 @@ Sections: // Check that we can detect unsupported versions. SmallString<128> UnsupportedVersionYamlString(CommonYamlString); UnsupportedVersionYamlString += R"( - Version: 3 - BBEntries: - - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + - Version: 3 + BBRanges: + - BaseAddress: 0x11111 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 )"; { @@ -511,14 +512,28 @@ Sections: "unsupported SHT_LLVM_BB_ADDR_MAP version: 3"); } + SmallString<128> ZeroBBRangesYamlString(CommonYamlString); + ZeroBBRangesYamlString += R"( + - Version: 2 + Feature: 0x8 + BBRanges: [] +)"; + { + SCOPED_TRACE("zero bb ranges"); + DoCheck(ZeroBBRangesYamlString, + "invalid zero number of BB ranges at offset 3 in " + "SHT_LLVM_BB_ADDR_MAP section with index 1"); + } + SmallString<128> CommonVersionedYamlString(CommonYamlString); CommonVersionedYamlString += R"( - Version: 2 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + - Version: 2 + BBRanges: + - BaseAddress: 0x11111 + BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 )"; // Check that we can detect the malformed encoding when the section is @@ -539,24 +554,24 @@ Sections: SmallVector, 3> OverInt32LimitYamlStrings( 3, CommonVersionedYamlString); OverInt32LimitYamlStrings[0] += R"( - - ID: 1 - AddressOffset: 0x100000000 - Size: 0xFFFFFFFF - Metadata: 0xFFFFFFFF + - ID: 1 + AddressOffset: 0x100000000 + Size: 0xFFFFFFFF + Metadata: 0xFFFFFFFF )"; OverInt32LimitYamlStrings[1] += R"( - - ID: 2 - AddressOffset: 0xFFFFFFFF - Size: 0x100000000 - Metadata: 0xFFFFFFFF + - ID: 2 + AddressOffset: 0xFFFFFFFF + Size: 0x100000000 + Metadata: 0xFFFFFFFF )"; OverInt32LimitYamlStrings[2] += R"( - - ID: 3 - AddressOffset: 0xFFFFFFFF - Size: 0xFFFFFFFF - Metadata: 0x100000000 + - ID: 3 + AddressOffset: 0xFFFFFFFF + Size: 0xFFFFFFFF + Metadata: 0x100000000 )"; { @@ -602,7 +617,7 @@ Sections: // with an out-of-range value. SmallString<128> OverLimitNumBlocks(CommonVersionedYamlString); OverLimitNumBlocks += R"( - NumBlocks: 0x100000000 + NumBlocks: 0x100000000 )"; { @@ -610,6 +625,16 @@ Sections: DoCheck(OverLimitNumBlocks, "ULEB128 value at offset 0xa exceeds UINT32_MAX (0x100000000)"); } + + // Check for proper error handling when the 'NumBBRanges' field is overridden + // with an out-of-range value. + SmallString<128> OverLimitNumBBRanges(CommonVersionedYamlString); + OverLimitNumBBRanges += R"( + NumBBRanges: 0x100000000 + Feature: 0x8 +)"; + DoCheck(OverLimitNumBBRanges, + "ULEB128 value at offset 0x2 exceeds UINT32_MAX (0x100000000)"); } // Test for the ELFObjectFile::readBBAddrMap API. @@ -626,51 +651,67 @@ Sections: Link: 1 Entries: - Version: 2 - Address: 0x11111 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BaseAddress: 0x11111 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 - Name: .llvm_bb_addr_map_2 Type: SHT_LLVM_BB_ADDR_MAP Link: 1 Entries: - Version: 2 - Address: 0x22222 - BBEntries: - - ID: 2 - AddressOffset: 0x0 - Size: 0x2 - Metadata: 0x4 + Feature: 0x8 + BBRanges: + - BaseAddress: 0x22222 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 + - BaseAddress: 0xFFFFF + BBEntries: + - ID: 15 + AddressOffset: 0xF0 + Size: 0xF1 + Metadata: 0x1F - Name: .llvm_bb_addr_map_3 Type: SHT_LLVM_BB_ADDR_MAP Link: 2 Entries: - Version: 1 - Address: 0x33333 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x3 - Metadata: 0x6 + BBRanges: + - BaseAddress: 0x33333 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x6 - Name: .llvm_bb_addr_map_4 - Type: SHT_LLVM_BB_ADDR_MAP_V0 + Type: SHT_LLVM_BB_ADDR_MAP # Link: 0 (by default, can be overriden) Entries: - - Version: 0 - Address: 0x44444 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x4 - Metadata: 0x18 + - Version: 2 + BBRanges: + - BaseAddress: 0x44444 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x18 )"); - BBAddrMap E1(0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}); - BBAddrMap E2(0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}); - BBAddrMap E3(0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}}); - BBAddrMap E4(0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}}); + BBAddrMap E1 = { + {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}}}}; + BBAddrMap E2 = { + {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}}, + {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}}}}}}; + BBAddrMap E3 = { + {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}}}}}; + BBAddrMap E4 = { + {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}}}}}; std::vector Section0BBAddrMaps = {E4}; std::vector Section1BBAddrMaps = {E3}; @@ -734,13 +775,13 @@ Sections: // (not present) section. SmallString<128> InvalidLinkedYamlString(CommonYamlString); InvalidLinkedYamlString += R"( - Link: 10 + Link: 121 )"; DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/4, "unable to get the linked-to section for " - "SHT_LLVM_BB_ADDR_MAP_V0 section with index 4: invalid section " - "index: 10"); + "SHT_LLVM_BB_ADDR_MAP section with index 4: invalid section " + "index: 121"); { SCOPED_TRACE("invalid linked section"); // Linked sections are not checked when we don't target a specific text @@ -752,14 +793,14 @@ Sections: // Check that we can detect when bb-address-map decoding fails. SmallString<128> TruncatedYamlString(CommonYamlString); TruncatedYamlString += R"( - ShSize: 0x8 + ShSize: 0xa )"; { SCOPED_TRACE("truncated section"); DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, - "unable to read SHT_LLVM_BB_ADDR_MAP_V0 section with index 4: " - "unable to decode LEB128 at offset 0x00000008: malformed " + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 4: " + "unable to decode LEB128 at offset 0x0000000a: malformed " "uleb128, extends past end"); // Check that we can read the other section's bb-address-maps which are @@ -782,7 +823,6 @@ Sections: - Type: SHT_LLVM_BB_ADDR_MAP Name: .llvm_bb_addr_map Entries: - - Address: 0x11111 )"); auto DoCheck = [&](StringRef YamlString, const char *ErrMsg) { @@ -806,12 +846,13 @@ Sections: // Check that we can detect unsupported versions that are too old. SmallString<128> UnsupportedLowVersionYamlString(CommonYamlString); UnsupportedLowVersionYamlString += R"( - Version: 1 + - Version: 1 Feature: 0x4 - BBEntries: - - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BBEntries: + - AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 )"; { @@ -823,38 +864,39 @@ Sections: SmallString<128> CommonVersionedYamlString(CommonYamlString); CommonVersionedYamlString += R"( - Version: 2 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + - Version: 2 + BBRanges: + - BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 )"; // Check that we fail when function entry count is enabled but not provided. SmallString<128> MissingFuncEntryCount(CommonYamlString); MissingFuncEntryCount += R"( - Version: 2 + - Version: 2 Feature: 0x01 )"; { SCOPED_TRACE("missing function entry count"); DoCheck(MissingFuncEntryCount, - "unable to decode LEB128 at offset 0x0000000b: malformed uleb128, " - "extends past end"); + "unexpected end of data at offset 0x2 while reading [0x2, 0xa)"); } // Check that we fail when basic block frequency is enabled but not provided. SmallString<128> MissingBBFreq(CommonYamlString); MissingBBFreq += R"( - Version: 2 + - Version: 2 Feature: 0x02 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 )"; { @@ -866,21 +908,22 @@ Sections: // Check that we fail when branch probability is enabled but not provided. SmallString<128> MissingBrProb(CommonYamlString); MissingBrProb += R"( - Version: 2 + - Version: 2 Feature: 0x04 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x6 - - ID: 2 - AddressOffset: 0x1 - Size: 0x1 - Metadata: 0x2 - - ID: 3 - AddressOffset: 0x2 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x6 + - ID: 2 + AddressOffset: 0x1 + Size: 0x1 + Metadata: 0x2 + - ID: 3 + AddressOffset: 0x2 + Size: 0x1 + Metadata: 0x2 PGOAnalyses: - PGOBBEntries: - Successors: @@ -914,13 +957,14 @@ Sections: Link: 1 Entries: - Version: 2 - Address: 0x11111 Feature: 0x1 - BBEntries: - - ID: 1 - AddressOffset: 0x0 - Size: 0x1 - Metadata: 0x2 + BBRanges: + - BaseAddress: 0x11111 + BBEntries: + - ID: 1 + AddressOffset: 0x0 + Size: 0x1 + Metadata: 0x2 PGOAnalyses: - FuncEntryCount: 892 - Name: .llvm_bb_addr_map_2 @@ -928,13 +972,14 @@ Sections: Link: 1 Entries: - Version: 2 - Address: 0x22222 Feature: 0x2 - BBEntries: - - ID: 2 - AddressOffset: 0x0 - Size: 0x2 - Metadata: 0x4 + BBRanges: + - BaseAddress: 0x22222 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 PGOAnalyses: - PGOBBEntries: - BBFreq: 343 @@ -943,21 +988,22 @@ Sections: Link: 2 Entries: - Version: 2 - Address: 0x33333 Feature: 0x4 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x3 - Metadata: 0x6 - - ID: 1 - AddressOffset: 0x0 - Size: 0x3 - Metadata: 0x4 - - ID: 2 - AddressOffset: 0x0 - Size: 0x3 - Metadata: 0x0 + BBRanges: + - BaseAddress: 0x33333 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x6 + - ID: 1 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x4 + - ID: 2 + AddressOffset: 0x0 + Size: 0x3 + Metadata: 0x0 PGOAnalyses: - PGOBBEntries: - Successors: @@ -974,25 +1020,26 @@ Sections: # Link: 0 (by default, can be overriden) Entries: - Version: 2 - Address: 0x44444 Feature: 0x7 - BBEntries: - - ID: 0 - AddressOffset: 0x0 - Size: 0x4 - Metadata: 0x18 - - ID: 1 - AddressOffset: 0x0 - Size: 0x4 - Metadata: 0x0 - - ID: 2 - AddressOffset: 0x0 - Size: 0x4 - Metadata: 0x0 - - ID: 3 - AddressOffset: 0x0 - Size: 0x4 - Metadata: 0x0 + BBRanges: + - BaseAddress: 0x44444 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x18 + - ID: 1 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 + - ID: 2 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 + - ID: 3 + AddressOffset: 0x0 + Size: 0x4 + Metadata: 0x0 PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: @@ -1021,34 +1068,74 @@ Sections: # Link: 0 (by default, can be overriden) Entries: - Version: 2 - Address: 0x55555 Feature: 0x0 - BBEntries: - - ID: 2 - AddressOffset: 0x0 - Size: 0x2 - Metadata: 0x4 + BBRanges: + - BaseAddress: 0x55555 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x2 + Metadata: 0x4 PGOAnalyses: [{}] - )"); - - BBAddrMap E1(0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}); - PGOAnalysisMap P1 = {892, {}, {true, false, false}}; - BBAddrMap E2(0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}); - PGOAnalysisMap P2 = {{}, {{BlockFrequency(343), {}}}, {false, true, false}}; - BBAddrMap E3(0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}, - {1, 0x3, 0x3, {false, false, true, false, false}}, - {2, 0x6, 0x3, {false, false, false, false, false}}}); + - Name: .llvm_bb_addr_map_6 + Type: SHT_LLVM_BB_ADDR_MAP + # Link: 0 (by default, can be overriden) + Entries: + - Version: 2 + Feature: 0xc + BBRanges: + - BaseAddress: 0x66666 + BBEntries: + - ID: 0 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x6 + - ID: 1 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x4 + - BaseAddress: 0x666661 + BBEntries: + - ID: 2 + AddressOffset: 0x0 + Size: 0x6 + Metadata: 0x0 + PGOAnalyses: + - PGOBBEntries: + - Successors: + - ID: 1 + BrProb: 0x22222222 + - ID: 2 + BrProb: 0xcccccccc + - Successors: + - ID: 2 + BrProb: 0x88888888 + - Successors: [] +)"); + + BBAddrMap E1 = { + {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}}}}; + PGOAnalysisMap P1 = {892, {}, {true, false, false, false}}; + BBAddrMap E2 = { + {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}}}}; + PGOAnalysisMap P2 = { + {}, {{BlockFrequency(343), {}}}, {false, true, false, false}}; + BBAddrMap E3 = {{{0x33333, + {{0, 0x0, 0x3, {false, true, true, false, false}}, + {1, 0x3, 0x3, {false, false, true, false, false}}, + {2, 0x6, 0x3, {false, false, false, false, false}}}}}}; PGOAnalysisMap P3 = {{}, {{{}, {{1, BranchProbability::getRaw(0x1111'1111)}, {2, BranchProbability::getRaw(0xeeee'eeee)}}}, {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, {{}, {}}}, - {false, false, true}}; - BBAddrMap E4(0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}, - {1, 0x4, 0x4, {false, false, false, false, false}}, - {2, 0x8, 0x4, {false, false, false, false, false}}, - {3, 0xc, 0x4, {false, false, false, false, false}}}); + {false, false, true, false}}; + BBAddrMap E4 = {{{0x44444, + {{0, 0x0, 0x4, {false, false, false, true, true}}, + {1, 0x4, 0x4, {false, false, false, false, false}}, + {2, 0x8, 0x4, {false, false, false, false, false}}, + {3, 0xc, 0x4, {false, false, false, false, false}}}}}}; PGOAnalysisMap P4 = { 1000, {{BlockFrequency(1000), @@ -1060,19 +1147,32 @@ Sections: {3, BranchProbability::getRaw(0xeeee'eeee)}}}, {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, {BlockFrequency(1000), {}}}, - {true, true, true}}; - BBAddrMap E5(0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}}}); - PGOAnalysisMap P5 = {{}, {}, {false, false, false}}; + {true, true, true, false}}; + BBAddrMap E5 = { + {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}}}}}}; + PGOAnalysisMap P5 = {{}, {}, {false, false, false, false}}; + BBAddrMap E6 = { + {{0x66666, + {{0, 0x0, 0x6, {false, true, true, false, false}}, + {1, 0x6, 0x6, {false, false, true, false, false}}}}, + {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}}}}}}; + PGOAnalysisMap P6 = {{}, + {{{}, + {{1, BranchProbability::getRaw(0x2222'2222)}, + {2, BranchProbability::getRaw(0xcccc'cccc)}}}, + {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}}, + {{}, {}}}, + {false, false, true, true}}; - std::vector Section0BBAddrMaps = {E4, E5}; + std::vector Section0BBAddrMaps = {E4, E5, E6}; std::vector Section1BBAddrMaps = {E3}; std::vector Section2BBAddrMaps = {E1, E2}; - std::vector AllBBAddrMaps = {E1, E2, E3, E4, E5}; + std::vector AllBBAddrMaps = {E1, E2, E3, E4, E5, E6}; - std::vector Section0PGOAnalysisMaps = {P4, P5}; + std::vector Section0PGOAnalysisMaps = {P4, P5, P6}; std::vector Section1PGOAnalysisMaps = {P3}; std::vector Section2PGOAnalysisMaps = {P1, P2}; - std::vector AllPGOAnalysisMaps = {P1, P2, P3, P4, P5}; + std::vector AllPGOAnalysisMaps = {P1, P2, P3, P4, P5, P6}; auto DoCheckSucceeds = [&](StringRef YamlString, std::optional TextSectionIndex, @@ -1098,10 +1198,21 @@ Sections: EXPECT_EQ(*BBAddrMaps, ExpectedResult); if (ExpectedPGO) { EXPECT_EQ(BBAddrMaps->size(), PGOAnalyses.size()); + for (const auto &PGO : PGOAnalyses) { + errs() << "FuncEntryCount: " << PGO.FuncEntryCount << "\n"; + for (const auto &PGOBB : PGO.BBEntries) + errs() << "\tBB: " << PGOBB.BlockFreq.getFrequency() << "\n"; + } + errs() << " To expected:\n"; + for (const auto &PGO : *ExpectedPGO) { + errs() << "FuncEntryCount: " << PGO.FuncEntryCount << "\n"; + for (const auto &PGOBB : PGO.BBEntries) + errs() << "\tBB: " << PGOBB.BlockFreq.getFrequency() << "\n"; + } EXPECT_EQ(PGOAnalyses, *ExpectedPGO); for (auto &&[BB, PGO] : llvm::zip(*BBAddrMaps, PGOAnalyses)) { if (PGO.FeatEnable.BBFreq || PGO.FeatEnable.BrProb) - EXPECT_EQ(BB.getBBEntries().size(), PGO.BBEntries.size()); + EXPECT_EQ(BB.getNumBBEntries(), PGO.BBEntries.size()); } } }; @@ -1157,15 +1268,15 @@ Sections: // (not present) section. SmallString<128> InvalidLinkedYamlString(CommonYamlString); InvalidLinkedYamlString += R"( - Link: 10 + Link: 121 )"; { SCOPED_TRACE("invalid linked section"); DoCheckFails(InvalidLinkedYamlString, /*TextSectionIndex=*/5, "unable to get the linked-to section for " - "SHT_LLVM_BB_ADDR_MAP section with index 5: invalid section " - "index: 10"); + "SHT_LLVM_BB_ADDR_MAP section with index 6: invalid section " + "index: 121"); // Linked sections are not checked when we don't target a specific text // section. @@ -1183,10 +1294,10 @@ Sections: { SCOPED_TRACE("truncated section"); - DoCheckFails(TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, - "unable to read SHT_LLVM_BB_ADDR_MAP section with index 5: " - "unable to decode LEB128 at offset 0x0000000a: malformed " - "uleb128, extends past end"); + DoCheckFails( + TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, + "unable to read SHT_LLVM_BB_ADDR_MAP section with index 6: " + "unexpected end of data at offset 0xa while reading [0x3, 0xb)"); // Check that we can read the other section's bb-address-maps which are // valid. DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index f09ab5e12438..f04d45cf0983 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -100,34 +100,36 @@ static_assert( decltype(BBAddrMap::BBEntry::ID)>, "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap"); -TEST(ELFTypesTest, PGOAnalysisMapFeaturesEncodingTest) { - const std::array Decoded = { - {{false, false, false}, - {true, false, false}, - {false, true, false}, - {false, false, true}, - {true, true, false}, - {false, true, true}, - {true, true, true}}}; - const std::array Encoded = { - {0b000, 0b001, 0b010, 0b100, 0b011, 0b110, 0b111}}; +TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { + const std::array Decoded = { + {{false, false, false, false}, + {true, false, false, false}, + {false, true, false, false}, + {false, false, true, false}, + {false, false, false, true}, + {true, true, false, false}, + {false, true, true, false}, + {false, true, true, true}, + {true, true, true, true}}}; + const std::array Encoded = { + {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111}}; for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) EXPECT_EQ(Feat.encode(), EncodedVal); for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) { - Expected FeatEnableOrError = - PGOAnalysisMap::Features::decode(EncodedVal); + Expected FeatEnableOrError = + BBAddrMap::Features::decode(EncodedVal); ASSERT_THAT_EXPECTED(FeatEnableOrError, Succeeded()); EXPECT_EQ(*FeatEnableOrError, Feat); } } -TEST(ELFTypesTest, PGOAnalysisMapFeaturesInvalidEncodingTest) { +TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) { const std::array Errors = { - "invalid encoding for PGOAnalysisMap::Features: 0x8", - "invalid encoding for PGOAnalysisMap::Features: 0xff"}; - const std::array Values = {{0b1000, 0b1111'1111}}; + "invalid encoding for BBAddrMap::Features: 0x10", + "invalid encoding for BBAddrMap::Features: 0xff"}; + const std::array Values = {{0b10000, 0b1111'1111}}; for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { - EXPECT_THAT_ERROR(PGOAnalysisMap::Features::decode(Val).takeError(), + EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(), FailedWithMessage(Error)); } } -- Gitee From 741090a00c16943f5849922929e63dcf0b058adb Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Wed, 7 Feb 2024 23:23:32 +0000 Subject: [PATCH 33/47] [Driver] Allow -fbasic-block-sections for AArch64 ELF (#80916) Basic block sections "all" doesn't work on AArch64 since branch relaxation may create new basic blocks. However, the other basic block section modes should work out of the box since machine function splitting already uses the basic block sections pass. --- clang/lib/Driver/ToolChains/Clang.cpp | 8 ++++++++ clang/test/Driver/fbasic-block-sections.c | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e0cc4f9e5de6..1d48d0d22949 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5981,6 +5981,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, << A->getAsString(Args) << A->getValue(); else A->render(Args, CmdArgs); + } else if (Triple.isAArch64() && Triple.isOSBinFormatELF()) { + // "all" is not supported on AArch64 since branch relaxation creates new + // basic blocks for some cross-section branches. + if (Val != "labels" && Val != "none" && !Val.starts_with("list=")) + D.Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + else + A->render(Args, CmdArgs); } else if (Triple.isNVPTX()) { // Do not pass the option to the GPU compilation. We still want it enabled // for the host-side compilation, so seeing it here is not an error. diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index f5007faf2e38..b4f06c5451bf 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -2,6 +2,10 @@ // RUN: %clang -### --target=x86_64 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-ALL %s // RUN: %clang -### --target=x86_64 -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s // RUN: %clang -### --target=x86_64 -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s +// RUN: %clang -### --target=aarch64 -fbasic-block-sections=none %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-NONE %s +// RUN: %clang -### --target=aarch64 -fbasic-block-sections=list=%s %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LIST %s +// RUN: %clang -### --target=aarch64 -fbasic-block-sections=labels %s -S 2>&1 | FileCheck -check-prefix=CHECK-OPT-LABELS %s +// RUN: %clang -### --target=aarch64 -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-INVALID-VALUE %s // RUN: not %clang -c --target=arm-unknown-linux -fbasic-block-sections=all %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s // RUN: %clang -### --target=arm-unknown-linux -fbasic-block-sections=all -fbasic-block-sections=none %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-NOOPT %s -- Gitee From 2caec89098c0f1f4b109e58aa4c9560d07aa02db Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Tue, 13 Feb 2024 21:53:05 -0500 Subject: [PATCH 34/47] [SHT_LLVM_BB_ADDR_MAP][obj2yaml] Implements PGOAnalysisMap for elf2yaml and tests. (#80924) Adds support to obj2yaml for PGO Analysis Map. Adds a test to both obj2yaml and yaml2obj. --- .../ELF/bb-addr-map-pgo-analysis-map.yaml | 232 ++++++++++++++++++ .../ELF/bb-addr-map-pgo-analysis-map.yaml | 83 +++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 37 +++ 3 files changed, 352 insertions(+) create mode 100644 llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml create mode 100644 llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml new file mode 100644 index 000000000000..299bf463cf4b --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -0,0 +1,232 @@ +## Check how obj2yaml produces YAML PGO Analysis Map in .llvm_bb_addr_map. + +## Check that obj2yaml uses the "Entries" tag to describe an .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: obj2yaml %t1 | FileCheck %s --check-prefix=VALID + +# VALID: --- !ELF +# VALID-NEXT: FileHeader: +# VALID-NEXT: Class: ELFCLASS64 +# VALID-NEXT: Data: ELFDATA2LSB +# VALID-NEXT: Type: ET_EXEC +# VALID-NEXT: Sections: +# VALID-NEXT: - Name: .llvm_bb_addr_map +# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# VALID-NEXT: Entries: +# VALID-NEXT: - Version: 2 +# VALID-NEXT: Feature: 0x7 +## The 'BaseAddress' field is omitted when it's zero. +# VALID-NEXT: BBRanges: +# VALID-NEXT: - BBEntries: +# VALID-NEXT: - ID: 0 +# VALID-NEXT: AddressOffset: 0x1 +# VALID-NEXT: Size: 0x2 +# VALID-NEXT: Metadata: 0x3 +# VALID-NEXT: - ID: 2 +# VALID-NEXT: AddressOffset: 0x4 +# VALID-NEXT: Size: 0x5 +# VALID-NEXT: Metadata: 0x6 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 +# VALID-NEXT: Size: 0xFFFFFFFFFFFFFFF8 +# VALID-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 +# VALID-NEXT: - Version: 2 +# VALID-NEXT: Feature: 0xA +# VALID-NEXT: BBRanges: +# VALID-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20 +# VALID-NEXT: BBEntries: +# VALID-NEXT: - ID: 6 +# VALID-NEXT: AddressOffset: 0xA +# VALID-NEXT: Size: 0xB +# VALID-NEXT: Metadata: 0xC +# VALID-NEXT: PGOAnalyses: +# VALID-NEXT: - FuncEntryCount: 100 +# VALID-NEXT: PGOBBEntries: +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: Successors: +# VALID-NEXT: - ID: 2 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: Successors: +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0xFFFFFFFF +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: Successors: [] +# VALID-NEXT: PGOBBEntries: +# VALID-NEXT: - BBFreq: 20 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + ShSize: [[SIZE=]] + Entries: + - Version: 2 + Feature: 0x7 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 0 + AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + - ID: 2 + AddressOffset: 0x4 + Size: 0x5 + Metadata: 0x6 + - ID: 4 + AddressOffset: 0xFFFFFFFFFFFFFFF7 + Size: 0xFFFFFFFFFFFFFFF8 + Metadata: 0xFFFFFFFFFFFFFFF9 + - Version: 2 + Feature: 0xA + BBRanges: + - BaseAddress: 0xFFFFFFFFFFFFFF20 + BBEntries: + - ID: 6 + AddressOffset: 0xA + Size: 0xB + Metadata: 0xC + PGOAnalyses: + - FuncEntryCount: 100 + PGOBBEntries: + - BBFreq: 100 + Successors: + - ID: 2 + BrProb: 0x80000000 + - ID: 4 + BrProb: 0x80000000 + - BBFreq: 50 + Successors: + - ID: 4 + BrProb: 0xFFFFFFFF + - BBFreq: 100 + Successors: [] + - PGOBBEntries: + - BBFreq: 20 + +## Check obj2yaml can dump multiple .llvm_bb_addr_map sections. + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: obj2yaml %t2 | FileCheck %s --check-prefix=MULTI + +# MULTI: --- !ELF +# MULTI-NEXT: FileHeader: +# MULTI-NEXT: Class: ELFCLASS64 +# MULTI-NEXT: Data: ELFDATA2LSB +# MULTI-NEXT: Type: ET_EXEC +# MULTI-NEXT: Sections: +# MULTI-NEXT: - Name: .llvm_bb_addr_map +# MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MULTI-NEXT: Entries: +## Fieldx 'BaseAddress' is omitted when it is zero. +# MULTI-NEXT: - Version: 0 +# MULTI-NEXT: Feature: 0x3 +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BBEntries: +# MULTI-NEXT: - ID: 0 +# MULTI-NEXT: AddressOffset: 0x1 +# MULTI-NEXT: Size: 0x2 +# MULTI-NEXT: Metadata: 0x3 +# MULTI-NEXT: PGOAnalyses: +# MULTI-NEXT: - FuncEntryCount: 0 +# MULTI-NEXT: PGOBBEntries: +# MULTI-NEXT: - BBFreq: 0 +# MULTI-NEXT: - Name: '.llvm_bb_addr_map (1)' +# MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Version: 0 +# MULTI-NEXT: Feature: 0x1 +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BaseAddress: 0x20 +# MULTI-NEXT: BBEntries: [] +# MULTI-NEXT: PGOAnalyses: +# MULTI-NEXT: - FuncEntryCount: 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + Entries: +## Check that obj2yaml does not emit the 'BaseAddress' and 'Feature' fields when +## they are zero. + - Version: 0 + Feature: 0x3 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + PGOAnalyses: + - FuncEntryCount: 0 + PGOBBEntries: + - BBFreq: 0 + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 0 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x20 + PGOAnalyses: + - FuncEntryCount: 0 + +## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section +## when it can't extract the entries, for example, when the section is truncated. + +# RUN: yaml2obj --docnum=1 -DSIZE=0x1D %s -o %t3 +# RUN: obj2yaml %t3 | FileCheck %s --check-prefixes=TRUNCATED,INVALID + +# INVALID: --- !ELF +# INVALID-NEXT: FileHeader: +# INVALID-NEXT: Class: ELFCLASS64 +# INVALID-NEXT: Data: ELFDATA2LSB +# INVALID-NEXT: Type: ET_EXEC +# INVALID-NEXT: Sections: +# INVALID-NEXT: - Name: .llvm_bb_addr_map +# INVALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# TRUNCATED-NEXT: Content: {{([[:xdigit:]]{58})}}{{$}} +# TRUNCATED-NEXT: Content: {{([[:xdigit:]]{58})}}{{$}} + +## Check that obj2yaml uses the "Content" tag when original YAML does not +## provide a PGO field that was enabled in the feature byte + +# RUN: yaml2obj --docnum=3 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=MISSING-FEC + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: '.llvm_bb_addr_map' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 0 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x20 + +# MISSING-FEC: --- !ELF +# MISSING-FEC-NEXT: FileHeader: +# MISSING-FEC-NEXT: Class: ELFCLASS64 +# MISSING-FEC-NEXT: Data: ELFDATA2LSB +# MISSING-FEC-NEXT: Type: ET_EXEC +# MISSING-FEC-NEXT: Sections: +# MISSING-FEC-NEXT: - Name: .llvm_bb_addr_map +# MISSING-FEC-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MISSING-FEC-NEXT: Content: '{{([[:xdigit:]]+)}}'{{$}} + diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml new file mode 100644 index 000000000000..4dfaf60be3c0 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -0,0 +1,83 @@ +## Check how yaml2obj produces PGO Analysis Map in .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-readobj --sections --section-data %t1 | FileCheck %s + +# Case 4: Specify Entries. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8 +# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801 +# CHECK-NEXT: ) + +# Case 7: Not including a field which is enabled in feature doesn't emit value +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 02012000 00000000 0000020D 010203 | +# CHECK-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + +## Test the following cases: + +## 1) We can produce an .llvm_bb_addr_map section from a description with +## Entries and PGO Analysis data. + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x7 + BBRanges: + - BaseAddress: 0x0000000000000020 + BBEntries: + - ID: 11 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + PGOAnalyses: + - FuncEntryCount: 1000 + PGOBBEntries: + - BBFreq: 1000 + Successors: + - ID: 12 + BrProb: 0xeeeeeeee + - ID: 13 + BrProb: 0x11111111 + +## 2) According to feature we have FuncEntryCount but none is provided in yaml + - Name: '.llvm_bb_addr_map (2)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x0000000000000020 + NumBlocks: 2 + BBEntries: + - ID: 13 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + +## Check that yaml2obj generates a warning when we use unsupported feature. +# RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xff + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: '.llvm_bb_addr_map' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 +## Specify unsupported feature + Feature: 0xFF + diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index ddd21ed889f2..12b621262c6d 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -889,6 +889,8 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4); std::vector Entries; + bool HasAnyPGOAnalysisMapEntry = false; + std::vector PGOAnalyses; DataExtractor::Cursor Cur(0); uint8_t Version = 0; uint8_t Feature = 0; @@ -905,6 +907,7 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { } uint64_t NumBBRanges = 1; uint64_t NumBlocks = 0; + uint32_t TotalNumBlocks = 0; auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(Feature); if (!FeatureOrErr) return FeatureOrErr.takeError(); @@ -934,10 +937,42 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { uint64_t Metadata = Data.getULEB128(Cur); BBEntries.push_back({ID, Offset, Size, Metadata}); } + TotalNumBlocks += BBEntries.size(); BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries}); } Entries.push_back( {Version, Feature, /*NumBBRanges=*/{}, std::move(BBRanges)}); + + ELFYAML::PGOAnalysisMapEntry &PGOAnalysis = PGOAnalyses.emplace_back(); + if (FeatureOrErr->hasPGOAnalysis()) { + HasAnyPGOAnalysisMapEntry = true; + + if (FeatureOrErr->FuncEntryCount) + PGOAnalysis.FuncEntryCount = Data.getULEB128(Cur); + + if (FeatureOrErr->hasPGOAnalysisBBData()) { + auto &PGOBBEntries = PGOAnalysis.PGOBBEntries.emplace(); + for (uint64_t BlockIndex = 0; Cur && BlockIndex < TotalNumBlocks; + ++BlockIndex) { + auto &PGOBBEntry = PGOBBEntries.emplace_back(); + if (FeatureOrErr->BBFreq) { + PGOBBEntry.BBFreq = Data.getULEB128(Cur); + if (!Cur) + break; + } + + if (FeatureOrErr->BrProb) { + auto &SuccEntries = PGOBBEntry.Successors.emplace(); + uint64_t SuccCount = Data.getULEB128(Cur); + for (uint64_t SuccIdx = 0; Cur && SuccIdx < SuccCount; ++SuccIdx) { + uint32_t ID = Data.getULEB128(Cur); + uint32_t BrProb = Data.getULEB128(Cur); + SuccEntries.push_back({ID, BrProb}); + } + } + } + } + } } if (!Cur) { @@ -946,6 +981,8 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { S->Content = yaml::BinaryRef(Content); } else { S->Entries = std::move(Entries); + if (HasAnyPGOAnalysisMapEntry) + S->PGOAnalyses = std::move(PGOAnalyses); } return S.release(); -- Gitee From 6334c0b31329ee4b479c03bcab0932c4b98d45e9 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Fri, 4 Aug 2023 18:58:55 +0000 Subject: [PATCH 35/47] [CodeGen][AArch64] Don't split functions with a red zone on AArch64 Because unconditional branch relaxation on AArch64 grows the stack to spill a register, splitting a function would cause the red zone to be overwritten. Explicitly disable MFS for such functions. Differential Revision: https://reviews.llvm.org/D157127 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 +++ llvm/lib/CodeGen/MachineFunctionSplitter.cpp | 17 ++-------- llvm/lib/CodeGen/TargetInstrInfo.cpp | 20 ++++++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 11 +++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 ++ .../CodeGen/X86/machine-function-splitter.ll | 31 +++++++++++++++++++ 6 files changed, 71 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 93dfcfc39924..1f5893e72382 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2066,6 +2066,10 @@ public: return false; } + /// Return true if the function is a viable candidate for machine function + /// splitting. The criteria for if a function can be split may vary by target. + virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const; + /// Produce the expression describing the \p MI loading a value into /// the physical register \p Reg. This hook should only be used with /// \p MIs belonging to VReg-less functions. diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index fbc071536d22..64f7c36c22db 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -135,22 +136,10 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (!UseProfileData && !SplitAllEHCode) return false; - // TODO: We don't split functions where a section attribute has been set - // since the split part may not be placed in a contiguous region. It may also - // be more beneficial to augment the linker to ensure contiguous layout of - // split functions within the same section as specified by the attribute. - if (MF.getFunction().hasSection() || - MF.getFunction().hasFnAttribute("implicit-section-name")) + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + if (!TII.isFunctionSafeToSplit(MF)) return false; - // We don't want to proceed further for cold functions - // or functions of unknown hotness. Lukewarm functions have no prefix. - std::optional SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix && - (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) { - return false; - } - // Renumbering blocks here preserves the order of the blocks as // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort // blocks. Preserving the order of blocks is essential to retaining decisions diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2c6eb6c57cfc..922f0d826752 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1382,6 +1382,26 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return (DefCycle != -1 && DefCycle <= 1); } +bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { + // TODO: We don't split functions where a section attribute has been set + // since the split part may not be placed in a contiguous region. It may also + // be more beneficial to augment the linker to ensure contiguous layout of + // split functions within the same section as specified by the attribute. + if (MF.getFunction().hasSection() || + MF.getFunction().hasFnAttribute("implicit-section-name")) + return false; + + // We don't want to proceed further for cold functions + // or functions of unknown hotness. Lukewarm functions have no prefix. + std::optional SectionPrefix = MF.getFunction().getSectionPrefix(); + if (SectionPrefix && + (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) { + return false; + } + + return true; +} + std::optional TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e12dfeca8e24..ad3b2ebf7b6b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8408,6 +8408,17 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, return std::nullopt; } +bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { + // Functions cannot be split to different sections on AArch64 if they have + // a red zone. This is because relaxing a cross-section branch may require + // incrementing the stack pointer to spill a register, which would overwrite + // the red zone. + if (MF.getInfo()->hasRedZone().value_or(true)) + return false; + + return TargetInstrInfo::isFunctionSafeToSplit(MF); +} + std::optional AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 20210a96d67a..8e66bda80a18 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -325,6 +325,8 @@ public: std::optional isAddImmediate(const MachineInstr &MI, Register Reg) const override; + bool isFunctionSafeToSplit(const MachineFunction &MF) const override; + std::optional describeLoadedValue(const MachineInstr &MI, Register Reg) const override; diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index f725c471e267..a9d7eadbdb09 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -12,6 +12,7 @@ ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64 +; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -mfs-allow-unsupported-triple -aarch64-redzone | FileCheck %s -check-prefixes=MFS-REDZONE-AARCH64 ; COM: Machine function splitting with AFDO profiles ; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll @@ -467,6 +468,36 @@ define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { ret void } +define i32 @foo17(i1 zeroext %0, i32 %a, i32 %b) nounwind !prof !14 !section_prefix !15 { +;; Check that cold blocks in functions with red zones aren't split. +; MFS-DEFAULTS-LABEL: foo17 +; MFS-DEFAULTS-X86: foo17.cold: +; MFS-REDZONE-AARCH64-NOT: foo17.cold: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %x = alloca i32, align 4 + + br i1 %0, label %2, label %3, !prof !17 + +2: ; preds = %1 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + br label %4 + +3: ; preds = %1 + store i32 %a, ptr %b.addr, align 4 + store i32 %b, ptr %a.addr, align 4 + br label %4 + +4: ; preds = %3, %2 + %tmp = load i32, ptr %a.addr, align 4 + %tmp1 = load i32, ptr %b.addr, align 4 + %add = add nsw i32 %tmp, %tmp1 + store i32 %add, ptr %x, align 4 + %tmp2 = load i32, ptr %x, align 4 + ret i32 %tmp2 +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() -- Gitee From 1c0467e2dc66e2db2da740569d9e8c1a53871ba6 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Fri, 4 Aug 2023 01:16:07 +0000 Subject: [PATCH 36/47] [CodeGen][AArch64] Don't split jump table basic blocks Jump tables on AArch64 are label-relative rather than table-relative, so having jump table destinations that are in different sections causes problems with relocation. Jump table lookups have a max range of 1MB, so all destinations must be in the same section as the lookup code. Both of these restrictions can be mitigated with some careful and complex logic, but doing so doesn't gain a huge performance benefit. Efficiently ensuring jump tables are correct and can be compressed on AArch64 is a TODO item. In the meantime, don't split blocks that can cause problems. Differential Revision: https://reviews.llvm.org/D157124 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 ++ llvm/lib/CodeGen/MachineFunctionSplitter.cpp | 6 ++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 30 +++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 + .../CodeGen/X86/machine-function-splitter.ll | 86 +++++++++++++++++++ 5 files changed, 131 insertions(+) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 1f5893e72382..415655a96e38 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2070,6 +2070,13 @@ public: /// splitting. The criteria for if a function can be split may vary by target. virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const; + /// Return true if the MachineBasicBlock can safely be split to the cold + /// section. On AArch64, certain instructions may cause a block to be unsafe + /// to split to the cold section. + virtual bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const { + return true; + } + /// Produce the expression describing the \p MI loading a value into /// the physical register \p Reg. This hook should only be used with /// \p MIs belonging to VReg-less functions. diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 64f7c36c22db..38c1c56d2823 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -109,6 +109,12 @@ static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { std::optional Count = MBFI->getBlockProfileCount(&MBB); + + // Temporary hack to cope with AArch64's jump table encoding + const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); + if (!TII.isMBBSafeToSplitToCold(MBB)) + return false; + // For instrumentation profiles and sample profiles, we use different ways // to judge whether a block is cold and should be split. if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index ad3b2ebf7b6b..d35b4ea4020a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8419,6 +8419,36 @@ bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { return TargetInstrInfo::isFunctionSafeToSplit(MF); } +bool AArch64InstrInfo::isMBBSafeToSplitToCold( + const MachineBasicBlock &MBB) const { + // Because jump tables are label-relative instead of table-relative, they all + // must be in the same section or relocation fixup handling will fail. + + // Check if MBB is a jump table target + const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo(); + auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) { + return llvm::is_contained(JTE.MBBs, &MBB); + }; + if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB)) + return false; + + // Check if MBB contains a jump table lookup + for (const MachineInstr &MI : MBB) { + switch (MI.getOpcode()) { + case TargetOpcode::G_BRJT: + case AArch64::JumpTableDest32: + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + return false; + default: + continue; + } + } + + // MBB isn't a special case, so it's safe to be split to the cold section. + return true; +} + std::optional AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 8e66bda80a18..81dc5c60e6d8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -327,6 +327,8 @@ public: bool isFunctionSafeToSplit(const MachineFunction &MF) const override; + bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; + std::optional describeLoadedValue(const MachineInstr &MI, Register Reg) const override; diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index a9d7eadbdb09..0993e325703b 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -498,6 +498,91 @@ define i32 @foo17(i1 zeroext %0, i32 %a, i32 %b) nounwind !prof !14 !section_pre ret i32 %tmp2 } +define i32 @foo18(i32 %in) !prof !14 !section_prefix !15 { +;; Check that a cold block targeted by a jump table is not split +;; on AArch64. +; MFS-DEFAULTS-LABEL: foo18 +; MFS-DEFAULTS: .section .text.split.foo18 +; MFS-DEFAULTS-NEXT: foo18.cold: +; MFS-DEFAULTS-SAME: %common.ret +; MFS-DEFAULTS-X86-DAG: jmp qux +; MFS-DEFAULTS-X86-DAG: jmp bam +; MFS-DEFAULTS-AARCH64-NOT: b bar +; MFS-DEFAULTS-AARCH64-NOT: b baz +; MFS-DEFAULTS-AARCH64-NOT: b qux +; MFS-DEFAULTS-AARCH64-NOT: b bam + + switch i32 %in, label %common.ret [ + i32 0, label %hot1 + i32 1, label %hot2 + i32 2, label %cold1 + i32 3, label %cold2 + ], !prof !28 + +common.ret: ; preds = %0 + ret i32 0 + +hot1: ; preds = %0 + %1 = tail call i32 @bar() + ret i32 %1 + +hot2: ; preds = %0 + %2 = tail call i32 @baz() + ret i32 %2 + +cold1: ; preds = %0 + %3 = tail call i32 @bam() + ret i32 %3 + +cold2: ; preds = %0 + %4 = tail call i32 @qux() + ret i32 %4 +} + +define i32 @foo19(i32 %in) !prof !14 !section_prefix !15 { +;; Check that a cold block that contains a jump table dispatch is +;; not split on AArch64. +; MFS-DEFAULTS-LABEL: foo19 +; MFS-DEFAULTS: .section .text.split.foo19 +; MFS-DEFAULTS-NEXT: foo19.cold: +; MFS-DEFAULTS-X86: .LJTI18_0 +; MFS-DEFAULTS-AARCH64-NOT: .LJTI18_0 +; MFS-DEFAULTS: .section .rodata +; MFS-DEFAULTS: .LJTI18_0 + %cmp = icmp sgt i32 %in, 3 + br i1 %cmp, label %hot, label %cold_switch, !prof !17 + +hot: ; preds = %0 +ret i32 1 + +cold_switch: ; preds = %0 + switch i32 %in, label %common.ret [ + i32 0, label %hot1 + i32 1, label %hot2 + i32 2, label %cold1 + i32 3, label %cold2 + ], !prof !28 + +common.ret: ; preds = %0 + ret i32 0 + +hot1: ; preds = %0 + %1 = tail call i32 @bar() + ret i32 %1 + +hot2: ; preds = %0 + %2 = tail call i32 @baz() + ret i32 %2 + +cold1: ; preds = %0 + %3 = tail call i32 @bam() + ret i32 %3 + +cold2: ; preds = %0 + %4 = tail call i32 @qux() + ret i32 %4 +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() @@ -538,3 +623,4 @@ attributes #0 = { "implicit-section-name"="nosplit" } !25 = !{!"branch_weights", i32 0, i32 7000} !26 = !{!"branch_weights", i32 1000, i32 6000} !27 = !{!"function_entry_count", i64 10000} +!28 = !{!"branch_weights", i32 0, i32 4000, i32 4000, i32 0, i32 0} -- Gitee From e6af637dac3f4d624bb2e2d6aef65d2101bcbe16 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Wed, 23 Aug 2023 18:09:30 +0000 Subject: [PATCH 37/47] [CodeGen][AArch64] Don't split inline asm goto blocks or their targets Machine function splitting + branch relaxation currently don't properly handle inline asm goto blocks that conditional branch to cold goto labels. While such inline asm is technically invalid, machine function splitting is the only thing that exposes it as such. Since machine function splitting doesn't help too much in these circumstances anyway, disable it for asm goto blocks and their targets. Differential Revision: https://reviews.llvm.org/D158647 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 8 ++++++ .../CodeGen/X86/machine-function-splitter.ll | 27 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d35b4ea4020a..5cc81626d185 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8421,6 +8421,14 @@ bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { bool AArch64InstrInfo::isMBBSafeToSplitToCold( const MachineBasicBlock &MBB) const { + // Asm Goto blocks can contain conditional branches to goto labels, which can + // get moved out of range of the branch instruction. + auto isAsmGoto = [](const MachineInstr &MI) { + return MI.getOpcode() == AArch64::INLINEASM_BR; + }; + if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget()) + return false; + // Because jump tables are label-relative instead of table-relative, they all // must be in the same section or relocation fixup handling will fail. diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll index 0993e325703b..bbea35211d2e 100644 --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -583,6 +583,33 @@ cold2: ; preds = %0 ret i32 %4 } +define void @foo20(i1 zeroext %0) !prof !14 !section_prefix !15 { +;; Check that blocks containing or targeted by asm goto aren't split. +; MFS-DEFAULTS-LABEL: foo20 +; MFS-DEFAULTS-AARCH64-NOT: foo20.cold: +; MFS-DEFAULTS-X86: .section .text.split.foo20 +; MFS-DEFAULTS-X86: foo20.cold: +; MFS-DEFAULTS-X86-DAG: # %cold_asm +; MFS-DEFAULTS-X86-DAG: # %cold_asm_target + + br i1 %0, label %hot, label %cold_asm, !prof !17 + +hot: + %2 = call i32 @bar() + ret void + +cold_asm: + callbr void asm sideeffect "nop", "!i"() #3 + to label %asm.fallthrough [label %cold_asm_target] + +asm.fallthrough: + br label %cold_asm_target + +cold_asm_target: + %3 = call i32 @baz() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() -- Gitee From cf38e99dd80dd48cde217c21c3d730fc2ae74589 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Wed, 14 Feb 2024 18:58:07 +0000 Subject: [PATCH 38/47] [CodeGen][AArch64] Only split safe blocks in BBSections (#81553) Some types of machine function and machine basic block are unsafe to split on AArch64: basic blocks that contain jump table dispatch or targets (D157124), and blocks that contain inline ASM GOTO blocks or their targets (D158647) all cause issues and have been excluded from Machine Function Splitting on AArch64. These issues are caused by any transformation pass that places same-function basic blocks in different text sections (MachineFunctionSplitter and BasicBlockSections) and must be special-cased in both passes. --- llvm/lib/CodeGen/BasicBlockSections.cpp | 11 +- .../AArch64/basic-block-sections-cold.ll | 51 ++++++++ .../AArch64/basic-block-sections-unsafe.ll | 121 ++++++++++++++++++ 3 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll create mode 100644 llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index eb3f9e7078f1..09e45ea5794b 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -208,9 +208,14 @@ assignSections(MachineFunction &MF, if (I != FuncClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { - // BB goes into the special cold section if it is not specified in the - // cluster info map. - MBB.setSectionID(MBBSectionID::ColdSectionID); + const TargetInstrInfo &TII = + *MBB.getParent()->getSubtarget().getInstrInfo(); + + if (TII.isMBBSafeToSplitToCold(MBB)) { + // BB goes into the special cold section if it is not specified in the + // cluster info map. + MBB.setSectionID(MBBSectionID::ColdSectionID); + } } } diff --git a/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll b/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll new file mode 100644 index 000000000000..6641ef6a51c1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll @@ -0,0 +1,51 @@ +;; Check if basic blocks that don't get unique sections are placed in cold sections. +;; Basic block with id 1 and 2 must be in the cold section. +;; +;; Profile for version 0 +; RUN: echo '!_Z3bazb' > %t1 +; RUN: echo '!!0' >> %t1 +;; +;; Profile for version 1 +; RUN: echo 'v1' > %t2 +; RUN: echo 'f _Z3bazb' >> %t2 +; RUN: echo 'c 0' >> %t2 +;; +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names | FileCheck %s -check-prefix=SECTIONS +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t2 -unique-basic-block-section-names | FileCheck %s -check-prefix=SECTIONS +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=SPLIT + +define void @_Z3bazb(i1 zeroext %0) nounwind { + br i1 %0, label %2, label %4 + +2: ; preds = %1 + %3 = call i32 @_Z3barv() + br label %6 + +4: ; preds = %1 + %5 = call i32 @_Z3foov() + br label %6 + +6: ; preds = %2, %4 + ret void +} + +declare i32 @_Z3barv() #1 + +declare i32 @_Z3foov() #1 + +; SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits +; SECTIONS: _Z3bazb: +; Check that the basic block with id 1 doesn't get a section. +; SECTIONS-NOT: .section .text{{.*}}._Z3bazb.1,"ax",@progbits,unique +; Check that a single cold section is started here and id 1 and 2 blocks are placed here. +; SECTIONS: .section .text.split._Z3bazb,"ax",@progbits +; SECTIONS: _Z3bazb.cold: +; SECTIONS-NOT: .section .text.hot._Z3bazb._Z3bazb.2,"ax",@progbits,unique +; SECTIONS: .LBB0_2: +; SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb + +; SPLIT: .section .text.unlikely._Z3bazb,"ax",@progbits +; SPLIT-NEXT: _Z3bazb.cold: +; SPLIT-NEXT: bl _Z3barv +; SPLIT: .LBB0_2: +; SPLIT: .LBB_END0_2: diff --git a/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll b/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll new file mode 100644 index 000000000000..643465fded17 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll @@ -0,0 +1,121 @@ +;; Check if basic blocks without unique sections are only placed in cold sections if it is safe +;; to do so. +;; +;; Profile for version 0. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f _Z3asm_goto' >> %t1 +; RUN: echo 'c 0' >> %t1 +; RUN: echo 'f _Z3jump_table' >> %t1 +; RUN: echo 'c 0' >> %t1 +; RUN: echo 'f _Z3red_zone' >> %t1 +; RUN: echo 'c 0' >> %t1 +;; +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -function-sections -min-jump-table-entries=4 -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=JUMP-TABLES +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=RED-ZONE + +define void @_Z3asm_goto(i1 zeroext %0, i1 zeroext %1) nounwind { + ;; Check that blocks containing or targeted by asm goto aren't split. + ; CHECK-LABEL: _Z3asm_goto + ; CHECK: .section .text.unlikely._Z3asm_goto,"ax",@progbits + ; CHECK-NEXT: _Z3asm_goto.cold: + ; CHECK-NEXT: bl bam + ; CHECK: .LBB0_4: + ; CHECK: ret + ; CHECK: .LBB_END0_4: + + br i1 %0, label %3, label %5 + +3: ; preds = %2 + %4 = call i32 @bar() + callbr void asm sideeffect "nop", "!i"() #3 + to label %asm.fallthrough [label %5] + + +asm.fallthrough: ; preds = %3 + br label %5 + +5: ; preds = %2, %asm.fallthrough + %6 = call i32 @bar() + br i1 %1, label %7, label %9 + +7: + %8 = call i32 @bam() + br label %9 + +9: ; preds = %7 + ret void +} + +define i32 @_Z3jump_table(i32 %in) nounwind { + ;; Check that a cold block that contains a jump table dispatch or + ;; that is targeted by a jump table is not split. + ; JUMP-TABLES-LABEL: _Z3jump_table + ; JUMP-TABLES: .section .text.unlikely._Z3jump_table,"ax",@progbits + ; JUMP-TABLES-NEXT: _Z3jump_table.cold: + ; JUMP-TABLES-SAME: %common.ret + ; JUMP-TABLES-NOT: b bar + ; JUMP-TABLES-NOT: b baz + ; JUMP-TABLES-NOT: b qux + ; JUMP-TABLES-NOT: b bam + + switch i32 %in, label %common.ret [ + i32 0, label %cold1 + i32 1, label %cold2 + i32 2, label %cold3 + i32 3, label %cold4 + ] + + common.ret: ; preds = %0 + ret i32 0 + + cold1: ; preds = %0 + %1 = tail call i32 @bar() + ret i32 %1 + + cold2: ; preds = %0 + %2 = tail call i32 @baz() + ret i32 %2 + + cold3: ; preds = %0 + %3 = tail call i32 @bam() + ret i32 %3 + + cold4: ; preds = %0 + %4 = tail call i32 @qux() + ret i32 %4 +} + +define i32 @_Z3red_zone(i1 zeroext %0, i32 %a, i32 %b) nounwind { +;; Check that cold blocks in functions with red zones aren't split. +; RED-ZONE-LABEL: _Z3red_zone +; MFS-REDZONE-AARCH64-NOT: _Z3red_zone.cold: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %x = alloca i32, align 4 + + br i1 %0, label %2, label %3 + +2: ; preds = %1 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + br label %4 + +3: ; preds = %1 + store i32 %a, ptr %b.addr, align 4 + store i32 %b, ptr %a.addr, align 4 + br label %4 + +4: ; preds = %3, %2 + %tmp = load i32, ptr %a.addr, align 4 + %tmp1 = load i32, ptr %b.addr, align 4 + %add = add nsw i32 %tmp, %tmp1 + store i32 %add, ptr %x, align 4 + %tmp2 = load i32, ptr %x, align 4 + ret i32 %tmp2 +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() -- Gitee From 21b1345ccf0bcb99a2868d2bb0a56e0a18dae1a1 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Wed, 14 Feb 2024 15:43:39 -0800 Subject: [PATCH 39/47] [BOLT][DWARF] Add test for DW_AT_ranges input without function output (#81794) Added a test that relies on -fbasic-block-sections=all and --gc-sections that exercises a code path that previously printed a warning. --- bolt/lib/Rewrite/DWARFRewriter.cpp | 9 +- .../dwarf4-subprogram-single-gc-ranges.test | 6 +- bolt/test/X86/dwarf5-empty-function-ranges.s | 538 ++++++++++++++++++ .../dwarf5-subprogram-single-gc-ranges.test | 6 +- 4 files changed, 544 insertions(+), 15 deletions(-) create mode 100644 bolt/test/X86/dwarf5-empty-function-ranges.s diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index a2408b75779c..781bb960c263 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -813,15 +813,10 @@ void DWARFRewriter::updateUnitDebugInfo( DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc); DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc); if (FunctionRanges.empty()) { - if (LowPCVal && HighPCVal) { + if (LowPCVal && HighPCVal) FunctionRanges.push_back({0, HighPCVal.getDIEInteger().getValue()}); - } else { - // I haven't seen this case, but who knows what other compilers - // generate. + else FunctionRanges.push_back({0, 1}); - errs() << "BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed " - "by the linker, DW_AT_ranges is used\n"; - } } if (FunctionRanges.size() == 1 && !opts::AlwaysConvertToRanges) { diff --git a/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test index 9080052a2991..3e7e765f98b1 100644 --- a/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test +++ b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test @@ -2,14 +2,12 @@ # RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-subprogram-single-gc-ranges-main.s -o %t1.o # RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt -# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt > %t1.txt # RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s # This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. -# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used - # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_frame_base # POSTCHECK-NEXT: DW_AT_linkage_name diff --git a/bolt/test/X86/dwarf5-empty-function-ranges.s b/bolt/test/X86/dwarf5-empty-function-ranges.s new file mode 100644 index 000000000000..bfa317808163 --- /dev/null +++ b/bolt/test/X86/dwarf5-empty-function-ranges.s @@ -0,0 +1,538 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q -Wl,-gc-sections -fuse-ld=lld -Wl,--entry=main +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s +# RUN: llvm-dwarfdump --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s + +# PRECHECK: DW_TAG_subprogram +# PRECHECK-NEXT: DW_AT_ranges +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: DW_AT_frame_base +# PRECHECK-NEXT: DW_AT_linkage_name ("_Z6helperi") +# PRECHECK-NEXT: DW_AT_name ("helper") + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name ("_Z6helperi") +# POSTCHECK-NEXT: DW_AT_name ("helper") +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc (0x0000000000000000) +# POSTCHECK-NEXT: DW_AT_high_pc (0x0000000000000001) + +## Tests BOLT path that handles DW_AT_ranges with no output function ranges. + +## clang++ main.cpp -O0 -fno-inline-functions -fbasic-block-sections=all -g2 -S +## int helper(int argc) { +## int x = argc; +## if (x == 3) +## x++; +## else +## x--; +## return x; +## } +## int main(int argc, char *argv[]) { +## int x = argc; +## if (x == 3) +## x++; +## else +## x--; +## return x; +## } + + .text + .file "main.cpp" + .section .text._Z6helperi,"ax",@progbits + .globl _Z6helperi # -- Begin function _Z6helperi + .p2align 4, 0x90 + .type _Z6helperi,@function +_Z6helperi: # @_Z6helperi +.Lfunc_begin0: + .file 0 "/repro2" "main.cpp" md5 0x888a2704226ec400f256aa9c2207456c + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 0 2 11 prologue_end # main.cpp:2:11 + movl -4(%rbp), %eax + .loc 0 2 7 is_stmt 0 # main.cpp:2:7 + movl %eax, -8(%rbp) +.Ltmp1: + .loc 0 3 9 is_stmt 1 # main.cpp:3:9 + cmpl $3, -8(%rbp) +.Ltmp2: + .loc 0 3 7 is_stmt 0 # main.cpp:3:7 + jne _Z6helperi.__part.2 + jmp _Z6helperi.__part.1 +.LBB_END0_0: + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,1 +_Z6helperi.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 4 6 is_stmt 1 # main.cpp:4:6 + movl -8(%rbp), %eax + addl $1, %eax + movl %eax, -8(%rbp) + .loc 0 4 5 is_stmt 0 # main.cpp:4:5 + jmp _Z6helperi.__part.3 +.LBB_END0_1: + .size _Z6helperi.__part.1, .LBB_END0_1-_Z6helperi.__part.1 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,2 +_Z6helperi.__part.2: # %if.else + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 6 6 is_stmt 1 # main.cpp:6:6 + movl -8(%rbp), %eax + addl $-1, %eax + movl %eax, -8(%rbp) + jmp _Z6helperi.__part.3 +.LBB_END0_2: + .size _Z6helperi.__part.2, .LBB_END0_2-_Z6helperi.__part.2 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,3 +_Z6helperi.__part.3: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 7 10 # main.cpp:7:10 + movl -8(%rbp), %eax + .loc 0 7 3 epilogue_begin is_stmt 0 # main.cpp:7:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END0_3: + .size _Z6helperi.__part.3, .LBB_END0_3-_Z6helperi.__part.3 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits +.Lfunc_end0: + .size _Z6helperi, .Lfunc_end0-_Z6helperi + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 9 0 is_stmt 1 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp3: + .loc 0 10 11 prologue_end # main.cpp:10:11 + movl -8(%rbp), %eax + .loc 0 10 7 is_stmt 0 # main.cpp:10:7 + movl %eax, -20(%rbp) +.Ltmp4: + .loc 0 11 9 is_stmt 1 # main.cpp:11:9 + cmpl $3, -20(%rbp) +.Ltmp5: + .loc 0 11 7 is_stmt 0 # main.cpp:11:7 + jne main.__part.2 + jmp main.__part.1 +.LBB_END1_0: + .cfi_endproc + .section .text.main,"ax",@progbits,unique,4 +main.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 12 6 is_stmt 1 # main.cpp:12:6 + movl -20(%rbp), %eax + addl $1, %eax + movl %eax, -20(%rbp) + .loc 0 12 5 is_stmt 0 # main.cpp:12:5 + jmp main.__part.3 +.LBB_END1_1: + .size main.__part.1, .LBB_END1_1-main.__part.1 + .cfi_endproc + .section .text.main,"ax",@progbits,unique,5 +main.__part.2: # %if.else + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 14 6 is_stmt 1 # main.cpp:14:6 + movl -20(%rbp), %eax + addl $-1, %eax + movl %eax, -20(%rbp) + jmp main.__part.3 +.LBB_END1_2: + .size main.__part.2, .LBB_END1_2-main.__part.2 + .cfi_endproc + .section .text.main,"ax",@progbits,unique,6 +main.__part.3: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 15 10 # main.cpp:15:10 + movl -20(%rbp), %eax + .loc 0 15 3 epilogue_begin is_stmt 0 # main.cpp:15:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END1_3: + .size main.__part.3, .LBB_END1_3-main.__part.3 + .cfi_endproc + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x82 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 2 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x23 DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 123 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x37:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 4 # Abbrev [4] 0x42:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x4e:0x2d DW_TAG_subprogram + .byte 1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 123 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x59:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 3 # Abbrev [3] 0x64:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 127 # DW_AT_type + .byte 4 # Abbrev [4] 0x6f:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 108 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 10 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x7b:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x7f:0x5 DW_TAG_pointer_type + .long 132 # DW_AT_type + .byte 7 # Abbrev [7] 0x84:0x5 DW_TAG_pointer_type + .long 137 # DW_AT_type + .byte 6 # Abbrev [6] 0x89:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 3 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 + .long .Ldebug_ranges2-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z6helperi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z6helperi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .LBB_END0_3-_Z6helperi.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 3 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 3 # DW_RLE_startx_length + .byte 4 # start index + .uleb128 .LBB_END1_1-main.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 5 # start index + .uleb128 .LBB_END1_2-main.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 6 # start index + .uleb128 .LBB_END1_3-main.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 7 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges2: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z6helperi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z6helperi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .LBB_END0_3-_Z6helperi.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 3 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 3 # DW_RLE_startx_length + .byte 4 # start index + .uleb128 .LBB_END1_1-main.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 5 # start index + .uleb128 .LBB_END1_2-main.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 6 # start index + .uleb128 .LBB_END1_3-main.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 7 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1d8664d409cac2a923176a8e9a731385bde279e)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=108 +.Linfo_string2: + .asciz "/repro2" # string offset=117 +.Linfo_string3: + .asciz "_Z6helperi" # string offset=162 +.Linfo_string4: + .asciz "helper" # string offset=173 +.Linfo_string5: + .asciz "int" # string offset=180 +.Linfo_string6: + .asciz "main" # string offset=184 +.Linfo_string7: + .asciz "argc" # string offset=189 +.Linfo_string8: + .asciz "x" # string offset=194 +.Linfo_string9: + .asciz "argv" # string offset=196 +.Linfo_string10: + .asciz "char" # string offset=201 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad _Z6helperi.__part.1 + .quad _Z6helperi.__part.2 + .quad _Z6helperi.__part.3 + .quad .Lfunc_begin0 + .quad main.__part.1 + .quad main.__part.2 + .quad main.__part.3 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .ident "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1d8664d409cac2a923176a8e9a731385bde279e)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test index 04b7203a5bea..9f8f895ed5f1 100644 --- a/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test +++ b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test @@ -2,14 +2,12 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-subprogram-single-gc-ranges-main.s -o %t1.o # RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt -# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt > %t1.txt # RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s # This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. -# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used - # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_frame_base # POSTCHECK-NEXT: DW_AT_linkage_name -- Gitee From 4d66017196c0a51f240b63b67c8570398866d473 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 5 Oct 2023 18:26:50 -0700 Subject: [PATCH 40/47] BlockFrequencyInfo: Add PrintBlockFreq helper (#67512) - Refactor the (Machine)BlockFrequencyInfo::printBlockFreq functions into a `PrintBlockFreq()` function returning a `Printable` object. This simplifies usage as it can be directly piped to a `raw_ostream` like `dbgs() << PrintBlockFreq(MBFI, Freq) << '\n';`. - Previously there was an interesting behavior where `BlockFrequencyInfoImpl` stores frequencies both as a `Scaled64` number and as an `uint64_t`. Most algorithms use the `BlockFrequency` abstraction with the integers, the print function for basic blocks printed the `Scaled64` number potentially showing higher accuracy than was used by the algorithm. This changes things to only print `BlockFrequency` values. - Replace some instances of `dbgs() << Freq.getFrequency()` with the new function. --- .../llvm/Analysis/BlockFrequencyInfo.h | 18 ++++---- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 14 ++---- llvm/include/llvm/CodeGen/MBFIWrapper.h | 9 +--- .../llvm/CodeGen/MachineBlockFrequencyInfo.h | 20 +++++---- llvm/lib/Analysis/BlockFrequencyInfo.cpp | 43 +++++++++---------- llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 24 +++++------ llvm/lib/CodeGen/MBFIWrapper.cpp | 10 ----- .../lib/CodeGen/MachineBlockFrequencyInfo.cpp | 25 ++++++----- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 21 ++++----- llvm/lib/CodeGen/RegAllocGreedy.cpp | 18 ++++---- llvm/lib/CodeGen/ShrinkWrap.cpp | 4 +- 11 files changed, 94 insertions(+), 112 deletions(-) diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h index 39507570a1b2..6c33feb3bc05 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h @@ -16,6 +16,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/Printable.h" #include #include #include @@ -92,14 +93,6 @@ public: void calculate(const Function &F, const BranchProbabilityInfo &BPI, const LoopInfo &LI); - // Print the block frequency Freq to OS using the current functions entry - // frequency to convert freq into a relative decimal form. - raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; - - // Convenience method that attempts to look up the frequency associated with - // BB and print it to OS. - raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const; - uint64_t getEntryFreq() const; void releaseMemory(); void print(raw_ostream &OS) const; @@ -108,6 +101,15 @@ public: void verifyMatch(BlockFrequencyInfo &Other) const; }; +/// Print the block frequency @p Freq relative to the current functions entry +/// frequency. Returns a Printable object that can be piped via `<<` to a +/// `raw_ostream`. +Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq); + +/// Convenience function equivalent to calling +/// `printBlockFreq(BFI, BFI.getBlocakFreq(&BB))`. +Printable printBlockFreq(const BlockFrequencyInfo &BFI, const BasicBlock &BB); + /// Analysis pass which computes \c BlockFrequencyInfo. class BlockFrequencyAnalysis : public AnalysisInfoMixin { diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 9d96748874a0..2eb13a70b34f 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -533,16 +533,15 @@ public: void setBlockFreq(const BlockNode &Node, uint64_t Freq); - raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency &Freq) const; - uint64_t getEntryFreq() const { assert(!Freqs.empty()); return Freqs[0].Integer; } }; +void printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq, + BlockFrequency Freq); + namespace bfi_detail { template struct TypeMap {}; @@ -1068,11 +1067,6 @@ public: raw_ostream &print(raw_ostream &OS) const override; using BlockFrequencyInfoImplBase::dump; - using BlockFrequencyInfoImplBase::printBlockFreq; - - raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { - return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); - } void verifyMatch(BlockFrequencyInfoImpl &Other) const; }; @@ -1862,7 +1856,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { OS << Node->getName() << " : "; switch (GType) { case GVDT_Fraction: - Graph->printBlockFreq(OS, Node); + OS << printBlockFreq(*Graph, *Node); break; case GVDT_Integer: OS << Graph->getBlockFreq(Node).getFrequency(); diff --git a/llvm/include/llvm/CodeGen/MBFIWrapper.h b/llvm/include/llvm/CodeGen/MBFIWrapper.h index 714ecc5d4334..df87888a7f15 100644 --- a/llvm/include/llvm/CodeGen/MBFIWrapper.h +++ b/llvm/include/llvm/CodeGen/MBFIWrapper.h @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -33,15 +32,11 @@ class MBFIWrapper { std::optional getBlockProfileCount(const MachineBasicBlock *MBB) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const; void view(const Twine &Name, bool isSimple = true); uint64_t getEntryFreq() const; - const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } + const MachineBlockFrequencyInfo &getMBFI() const { return MBFI; } - private: +private: const MachineBlockFrequencyInfo &MBFI; DenseMap MergedBBFreq; }; diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 1152eefed6e4..517fe1207031 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -90,20 +90,22 @@ public: /// rendered using dot. void view(const Twine &Name, bool isSimple = true) const; - // Print the block frequency Freq to OS using the current functions entry - // frequency to convert freq into a relative decimal form. - raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; - - // Convenience method that attempts to look up the frequency associated with - // BB and print it to OS. - raw_ostream &printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const; - /// Divide a block's BlockFrequency::getFrequency() value by this value to /// obtain the entry block - relative frequency of said block. uint64_t getEntryFreq() const; }; +/// Print the block frequency @p Freq relative to the current functions entry +/// frequency. Returns a Printable object that can be piped via `<<` to a +/// `raw_ostream`. +Printable printBlockFreq(const MachineBlockFrequencyInfo &MBFI, + BlockFrequency Freq); + +/// Convenience function equivalent to calling +/// `printBlockFreq(MBFI, MBFI.getBlockFreq(&MBB))`. +Printable printBlockFreq(const MachineBlockFrequencyInfo &MBFI, + const MachineBasicBlock &MBB); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp index b18d04cc73db..e3824fe03343 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -78,14 +78,13 @@ cl::opt PGOViewCounts( clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text."))); -static cl::opt PrintBlockFreq( - "print-bfi", cl::init(false), cl::Hidden, - cl::desc("Print the block frequency info.")); - -cl::opt PrintBlockFreqFuncName( - "print-bfi-func-name", cl::Hidden, - cl::desc("The option to specify the name of the function " - "whose block frequency info is printed.")); +static cl::opt PrintBFI("print-bfi", cl::init(false), cl::Hidden, + cl::desc("Print the block frequency info.")); + +cl::opt + PrintBFIFuncName("print-bfi-func-name", cl::Hidden, + cl::desc("The option to specify the name of the function " + "whose block frequency info is printed.")); } // namespace llvm namespace llvm { @@ -193,9 +192,8 @@ void BlockFrequencyInfo::calculate(const Function &F, F.getName().equals(ViewBlockFreqFuncName))) { view(); } - if (PrintBlockFreq && - (PrintBlockFreqFuncName.empty() || - F.getName().equals(PrintBlockFreqFuncName))) { + if (PrintBFI && + (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) { print(dbgs()); } } @@ -266,17 +264,6 @@ const BranchProbabilityInfo *BlockFrequencyInfo::getBPI() const { return BFI ? &BFI->getBPI() : nullptr; } -raw_ostream &BlockFrequencyInfo:: -printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const { - return BFI ? BFI->printBlockFreq(OS, Freq) : OS; -} - -raw_ostream & -BlockFrequencyInfo::printBlockFreq(raw_ostream &OS, - const BasicBlock *BB) const { - return BFI ? BFI->printBlockFreq(OS, BB) : OS; -} - uint64_t BlockFrequencyInfo::getEntryFreq() const { return BFI ? BFI->getEntryFreq() : 0; } @@ -293,6 +280,18 @@ void BlockFrequencyInfo::verifyMatch(BlockFrequencyInfo &Other) const { BFI->verifyMatch(*Other.BFI); } +Printable llvm::printBlockFreq(const BlockFrequencyInfo &BFI, + BlockFrequency Freq) { + return Printable([&BFI, Freq](raw_ostream &OS) { + printBlockFreqImpl(OS, BFI.getEntryFreq(), Freq); + }); +} + +Printable llvm::printBlockFreq(const BlockFrequencyInfo &BFI, + const BasicBlock &BB) { + return printBlockFreq(BFI, BFI.getBlockFreq(&BB)); +} + INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 82b1e3b9eede..12db1f720968 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -643,19 +643,19 @@ BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); } -raw_ostream & -BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, - const BlockNode &Node) const { - return OS << getFloatingBlockFreq(Node); -} - -raw_ostream & -BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, - const BlockFrequency &Freq) const { +void llvm::printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq, + BlockFrequency Freq) { + if (Freq == BlockFrequency(0)) { + OS << "0"; + return; + } + if (EntryFreq == BlockFrequency(0)) { + OS << ""; + return; + } Scaled64 Block(Freq.getFrequency(), 0); - Scaled64 Entry(getEntryFreq(), 0); - - return OS << Block / Entry; + Scaled64 Entry(EntryFreq.getFrequency(), 0); + OS << Block / Entry; } void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { diff --git a/llvm/lib/CodeGen/MBFIWrapper.cpp b/llvm/lib/CodeGen/MBFIWrapper.cpp index 5b388be27839..6fe821e086ba 100644 --- a/llvm/lib/CodeGen/MBFIWrapper.cpp +++ b/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -43,16 +43,6 @@ MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const { return MBFI.getBlockProfileCount(MBB); } -raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const { - return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); -} - -raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const { - return MBFI.printBlockFreq(OS, Freq); -} - void MBFIWrapper::view(const Twine &Name, bool isSimple) { MBFI.view(Name, isSimple); } diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index b1cbe525d7e6..46a1e6834f8c 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -75,7 +75,7 @@ static cl::opt PrintMachineBlockFreq( // Command line option to specify the name of the function for block frequency // dump. Defined in Analysis/BlockFrequencyInfo.cpp. -extern cl::opt PrintBlockFreqFuncName; +extern cl::opt PrintBFIFuncName; } // namespace llvm static GVDAGType getGVDT() { @@ -203,8 +203,7 @@ void MachineBlockFrequencyInfo::calculate( view("MachineBlockFrequencyDAGS." + F.getName()); } if (PrintMachineBlockFreq && - (PrintBlockFreqFuncName.empty() || - F.getName().equals(PrintBlockFreqFuncName))) { + (PrintBFIFuncName.empty() || F.getName().equals(PrintBFIFuncName))) { MBFI->print(dbgs()); } } @@ -274,18 +273,18 @@ const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const { return MBFI ? &MBFI->getBPI() : nullptr; } -raw_ostream & -MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const { - return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS; +uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { + return MBFI ? MBFI->getEntryFreq() : 0; } -raw_ostream & -MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const { - return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS; +Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI, + BlockFrequency Freq) { + return Printable([&MBFI, Freq](raw_ostream &OS) { + printBlockFreqImpl(OS, MBFI.getEntryFreq(), Freq); + }); } -uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { - return MBFI ? MBFI->getEntryFreq() : 0; +Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI, + const MachineBasicBlock &MBB) { + return printBlockFreq(MBFI, MBFI.getBlockFreq(&MBB)); } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 912e9ec993e3..6913165add25 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1729,8 +1729,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); - LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> " + << printBlockFreq(MBFI->getMBFI(), CandidateFreq) + << " (freq)\n"); // For ehpad, we layout the least probable first as to avoid jumping back // from least probable landingpads to more probable ones. @@ -2095,8 +2096,8 @@ MachineBlockPlacement::findBestLoopTopHelper( if (Pred == L.getHeader()) continue; LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has " - << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + << Pred->succ_size() << " successors, " + << printBlockFreq(MBFI->getMBFI(), *Pred) << " freq\n"); if (Pred->succ_size() > 2) continue; @@ -2239,10 +2240,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, } BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; - LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " [L:" << SuccLoopDepth - << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + LLVM_DEBUG( + dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (" + << printBlockFreq(MBFI->getMBFI(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking @@ -2537,8 +2538,8 @@ void MachineBlockPlacement::rotateLoopWithProfile( } LLVM_DEBUG(dbgs() << "The cost of loop rotation by making " - << getBlockName(*Iter) - << " to the top: " << Cost.getFrequency() << "\n"); + << getBlockName(*Iter) << " to the top: " + << printBlockFreq(MBFI->getMBFI(), Cost) << "\n"); if (Cost < SmallestRotationCost) { SmallestRotationCost = Cost; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 48187e575494..a338365411e5 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1050,8 +1050,8 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. BestCost = SpillCost; - LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "; - MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); + LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = " + << printBlockFreq(*MBFI, BestCost) << '\n'); } unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, @@ -1106,8 +1106,8 @@ unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; - MBFI->printBlockFreq(dbgs(), Cost)); + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = " + << printBlockFreq(*MBFI, Cost)); if (Cost >= BestCost) { LLVM_DEBUG({ if (BestCand == NoCand) @@ -1133,8 +1133,8 @@ unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, Cost += calcGlobalSplitCost(Cand, Order); LLVM_DEBUG({ - dbgs() << ", total = "; - MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; + dbgs() << ", total = " + << printBlockFreq(*MBFI, Cost) << " with bundles"; for (int I : Cand.LiveBundles.set_bits()) dbgs() << " EB#" << I; dbgs() << ".\n"; @@ -2230,9 +2230,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { LLVM_DEBUG(dbgs() << "Checking profitability:\n"); BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); - LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() - << "\nNew Cost: " << NewCopiesCost.getFrequency() - << '\n'); + LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost) + << "\nNew Cost: " + << printBlockFreq(*MBFI, NewCopiesCost) << '\n'); if (OldCopiesCost < NewCopiesCost) { LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 4b1d3637a746..56644864edba 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -886,9 +886,9 @@ bool ShrinkWrap::performShrinkWrapping( do { LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " << printMBBReference(*Save) << ' ' - << MBFI->getBlockFreq(Save).getFrequency() + << printBlockFreq(*MBFI, *Save) << "\nRestore: " << printMBBReference(*Restore) << ' ' - << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); + << printBlockFreq(*MBFI, *Restore) << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && -- Gitee From c70f37389ec5f85ddde6df0e757997cfaf34e5ff Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Tue, 27 Feb 2024 14:13:00 -0500 Subject: [PATCH 41/47] [SHT_LLVM_BB_ADDR_MAP] Adds pretty printing of BFI and BPI for PGO Analysis Map in tools. (#82292) Primary change is to add a flag `--pretty-pgo-analysis-map` to llvm-readobj and llvm-objdump that prints block frequencies and branch probabilities in the same manner as BFI and BPI respectively. This can be helpful if you are manually inspecting the outputs from the tools. In order to print, I moved the `printBlockFreqImpl` function from Analysis to Support and renamed it to `printRelativeBlockFreq`. --- llvm/docs/CommandGuide/llvm-objdump.rst | 16 +++++- llvm/docs/CommandGuide/llvm-readobj.rst | 11 ++++ .../llvm/Analysis/BlockFrequencyInfoImpl.h | 3 - llvm/include/llvm/Support/BlockFrequency.h | 4 ++ llvm/lib/Analysis/BlockFrequencyInfo.cpp | 2 +- llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 15 ----- .../lib/CodeGen/MachineBlockFrequencyInfo.cpp | 2 +- llvm/lib/Support/BlockFrequency.cpp | 17 ++++++ .../llvm-objdump/X86/elf-pgoanalysismap.yaml | 56 +++++++++++++------ .../ELF/bb-addr-map-pgo-analysis-map.test | 28 ++++++---- llvm/tools/llvm-objdump/ObjdumpOpts.td | 4 ++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 29 +++++++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 35 +++++++----- llvm/tools/llvm-readobj/ObjDumper.h | 4 +- llvm/tools/llvm-readobj/Opts.td | 1 + llvm/tools/llvm-readobj/llvm-readobj.cpp | 8 ++- 16 files changed, 164 insertions(+), 71 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index cd3d14b41143..65e81e816575 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -258,7 +258,12 @@ OPTIONS When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. - When a bb-address-map section is present (i.e., the object file is built with ``-fbasic-block-sections=labels``), labels are retrieved from that section instead. + When a bb-address-map section is present (i.e., the object file is built with + ``-fbasic-block-sections=labels``), labels are retrieved from that section + instead. If a pgo-analysis-map is present alongside the bb-address-map, any + available analyses are printed after the relevant block label. By default, + any analysis with a special representation (i.e. BlockFrequency, + BranchProbability, etc) are printed as raw hex values. Only works with PowerPC objects or X86 linked images. @@ -278,6 +283,15 @@ OPTIONS cmp eax, dword ptr jge +.. option:: --pretty-pgo-analysis-map + + When using :option:`--symbolize-operands` with bb-address-map and + pgo-analysis-map, print analyses using the same format as their analysis + passes would. An example of pretty format would be printing block frequencies + relative to the entry block, the same as BFI. + + Only works when :option:`--symbolize-operands` is enabled. + .. option:: --triple= Target triple to disassemble for, see ``--version`` for available targets. diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst index cb9232ef5e56..7e4093c9fcfe 100644 --- a/llvm/docs/CommandGuide/llvm-readobj.rst +++ b/llvm/docs/CommandGuide/llvm-readobj.rst @@ -159,6 +159,17 @@ The following options are implemented only for the ELF file format. Display the contents of the basic block address map section(s), which contain the address of each function, along with the relative offset of each basic block. + When pgo analysis maps are present, all analyses are printed as their raw + value. + +.. option:: --pretty-pgo-analysis-map + + When pgo analysis maps are present in the basic block address map section(s), + analyses with special formats (i.e. BlockFrequency, BranchProbability, etc) + are printed using the same format as their respective analysis pass. + + Requires :option:`--bb-addr-map` to have an effect. + .. option:: --demangle, -C Display demangled symbol names in the output. diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 2eb13a70b34f..40e27d73512e 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -539,9 +539,6 @@ public: } }; -void printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq, - BlockFrequency Freq); - namespace bfi_detail { template struct TypeMap {}; diff --git a/llvm/include/llvm/Support/BlockFrequency.h b/llvm/include/llvm/Support/BlockFrequency.h index 6c624d7dad7d..832221de00be 100644 --- a/llvm/include/llvm/Support/BlockFrequency.h +++ b/llvm/include/llvm/Support/BlockFrequency.h @@ -18,6 +18,7 @@ namespace llvm { +class raw_ostream; class BranchProbability; // This class represents Block Frequency as a 64-bit value. @@ -110,6 +111,9 @@ public: } }; +void printRelativeBlockFreq(raw_ostream &OS, BlockFrequency EntryFreq, + BlockFrequency Freq); + } // namespace llvm #endif diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp index e3824fe03343..a172a3ed55a6 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -283,7 +283,7 @@ void BlockFrequencyInfo::verifyMatch(BlockFrequencyInfo &Other) const { Printable llvm::printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq) { return Printable([&BFI, Freq](raw_ostream &OS) { - printBlockFreqImpl(OS, BFI.getEntryFreq(), Freq); + printRelativeBlockFreq(OS, BFI.getEntryFreq(), Freq); }); } diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 12db1f720968..83d92294b37f 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -643,21 +643,6 @@ BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); } -void llvm::printBlockFreqImpl(raw_ostream &OS, BlockFrequency EntryFreq, - BlockFrequency Freq) { - if (Freq == BlockFrequency(0)) { - OS << "0"; - return; - } - if (EntryFreq == BlockFrequency(0)) { - OS << ""; - return; - } - Scaled64 Block(Freq.getFrequency(), 0); - Scaled64 Entry(EntryFreq.getFrequency(), 0); - OS << Block / Entry; -} - void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { Start = OuterLoop.getHeader(); Nodes.reserve(OuterLoop.Nodes.size()); diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 46a1e6834f8c..c81135d8b905 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -280,7 +280,7 @@ uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { Printable llvm::printBlockFreq(const MachineBlockFrequencyInfo &MBFI, BlockFrequency Freq) { return Printable([&MBFI, Freq](raw_ostream &OS) { - printBlockFreqImpl(OS, MBFI.getEntryFreq(), Freq); + printRelativeBlockFreq(OS, MBFI.getEntryFreq(), Freq); }); } diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp index a4a1e477d940..ac8b5c25aec6 100644 --- a/llvm/lib/Support/BlockFrequency.cpp +++ b/llvm/lib/Support/BlockFrequency.cpp @@ -12,6 +12,8 @@ #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/ScaledNumber.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -36,3 +38,18 @@ BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const { Freq /= Prob; return Freq; } + +void llvm::printRelativeBlockFreq(raw_ostream &OS, BlockFrequency EntryFreq, + BlockFrequency Freq) { + if (Freq == BlockFrequency(0)) { + OS << "0"; + return; + } + if (EntryFreq == BlockFrequency(0)) { + OS << ""; + return; + } + ScaledNumber Block(Freq.getFrequency(), 0); + ScaledNumber Entry(EntryFreq.getFrequency(), 0); + OS << Block / Entry; +} diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml index 732fab3e2a37..4d1e5408d86d 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml @@ -47,7 +47,9 @@ Symbols: # RUN: yaml2obj %s --docnum=2 -o %t2 # RUN: llvm-objdump %t2 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --check-prefix=ENTRYCOUNT-BLOCKFREQ +# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRYCOUNT-BLOCKFREQ +# RUN: llvm-objdump %t2 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRYCOUNT-BLOCKFREQ-PRETTY --- !ELF FileHeader: @@ -98,18 +100,28 @@ Symbols: Section: .text.foo Value: 0x0 -# ENTRYCOUNT-BLOCKFREQ: : -# ENTRYCOUNT-BLOCKFREQ: (Entry count: 1000, Frequency: 1000): -# ENTRYCOUNT-BLOCKFREQ: (Frequency: 133): -# ENTRYCOUNT-BLOCKFREQ: (Frequency: 18): -# ENTRYCOUNT-BLOCKFREQ: (Frequency: 1000): +# ENTRYCOUNT-BLOCKFREQ:: +# ENTRYCOUNT-BLOCKFREQ: (Entry count: 1000, Frequency: 1000): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 133): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 18): +# ENTRYCOUNT-BLOCKFREQ: (Frequency: 1000): + +# ENTRYCOUNT-BLOCKFREQ-PRETTY:: +# ENTRYCOUNT-BLOCKFREQ-PRETTY: (Entry count: 1000, Frequency: 1.0): +# ENTRYCOUNT-BLOCKFREQ-PRETTY: (Frequency: 0.133): +# ENTRYCOUNT-BLOCKFREQ-PRETTY: (Frequency: 0.018): +# ENTRYCOUNT-BLOCKFREQ-PRETTY: (Frequency: 1.0): ## Check the case where we have entry points, block frequency, and branch ## proabability information. # RUN: yaml2obj %s --docnum=3 -o %t3 # RUN: llvm-objdump %t3 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --check-prefix=ENTRY-FREQ-PROB +# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRY-FREQ-PROB +# RUN: llvm-objdump %t3 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRY-FREQ-PROB-PRETTY +# RUN: llvm-objdump %t3 -d --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr 2>&1 | \ +# RUN: FileCheck %s --check-prefix=MISSING-SYMBOLIZE-OPERANDS --- !ELF FileHeader: @@ -154,21 +166,21 @@ Sections: - BBFreq: 1000 Successors: - ID: 1 - BrProb: 0x22222222 + BrProb: 0x10000000 - ID: 2 - BrProb: 0x33333333 + BrProb: 0x15000000 - ID: 3 - BrProb: 0xaaaaaaaa + BrProb: 0x50000000 - BBFreq: 133 Successors: - ID: 2 - BrProb: 0x11111111 + BrProb: 0x10000000 - ID: 3 - BrProb: 0xeeeeeeee + BrProb: 0x70000000 - BBFreq: 18 Successors: - ID: 3 - BrProb: 0xffffffff + BrProb: 0x80000000 - BBFreq: 1000 Successors: [] Symbols: @@ -176,8 +188,16 @@ Symbols: Section: .text.foo Value: 0x0 -# ENTRY-FREQ-PROB: : -# ENTRY-FREQ-PROB: (Entry count: 1000, Frequency: 1000, Successors: BB1:22222222, BB2:33333333, BB3:aaaaaaaa): -# ENTRY-FREQ-PROB: (Frequency: 133, Successors: BB2:11111111, BB3:eeeeeeee): -# ENTRY-FREQ-PROB: (Frequency: 18, Successors: BB3:ffffffff): -# ENTRY-FREQ-PROB: (Frequency: 1000): +# ENTRY-FREQ-PROB:: +# ENTRY-FREQ-PROB: (Entry count: 1000, Frequency: 1000, Successors: BB1:10000000, BB2:15000000, BB3:50000000): +# ENTRY-FREQ-PROB: (Frequency: 133, Successors: BB2:10000000, BB3:70000000): +# ENTRY-FREQ-PROB: (Frequency: 18, Successors: BB3:80000000): +# ENTRY-FREQ-PROB: (Frequency: 1000): + +# ENTRY-FREQ-PROB-PRETTY:: +# ENTRY-FREQ-PROB-PRETTY: (Entry count: 1000, Frequency: 1.0, Successors: BB1:[0x10000000 / 0x80000000 = 12.50%], BB2:[0x15000000 / 0x80000000 = 16.41%], BB3:[0x50000000 / 0x80000000 = 62.50%]): +# ENTRY-FREQ-PROB-PRETTY: (Frequency: 0.133, Successors: BB2:[0x10000000 / 0x80000000 = 12.50%], BB3:[0x70000000 / 0x80000000 = 87.50%]): +# ENTRY-FREQ-PROB-PRETTY: (Frequency: 0.018, Successors: BB3:[0x80000000 / 0x80000000 = 100.00%]): +# ENTRY-FREQ-PROB-PRETTY: (Frequency: 1.0): + +# MISSING-SYMBOLIZE-OPERANDS: warning: --symbolize-operands must be enabled for --pretty-pgo-analysis-map to have an effect diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test index e5a9400c670c..5faafd4d83b2 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -3,17 +3,19 @@ ## Check 64-bit: # RUN: yaml2obj %s -DBITS=64 -DADDR=0x999999999 -o %t1.x64.o -# RUN: llvm-readobj %t1.x64.o --bb-addr-map 2>&1 | FileCheck %s -DADDR=0x999999999 -DFILE=%t1.x64.o --check-prefix=CHECK +# RUN: llvm-readobj %t1.x64.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DADDR=0x999999999 -DFILE=%t1.x64.o --check-prefixes=CHECK,RAW +# RUN: llvm-readobj %t1.x64.o --bb-addr-map --pretty-pgo-analysis-map 2>&1 | FileCheck --match-full-lines %s -DADDR=0x999999999 -DFILE=%t1.x64.o --check-prefixes=CHECK,PRETTY # RUN: llvm-readelf %t1.x64.o --bb-addr-map | FileCheck %s --check-prefix=GNU +# RUN: llvm-readobj %t1.x64.o --pretty-pgo-analysis-map 2>&1 | FileCheck %s --check-prefix=PRETTY-NO-BAM ## Check 32-bit: # RUN: yaml2obj %s -DBITS=32 -o %t1.x32.o -# RUN: llvm-readobj %t1.x32.o --bb-addr-map 2>&1 | FileCheck -DADDR=0x11111 %s -DFILE=%t1.x32.o --check-prefix=CHECK +# RUN: llvm-readobj %t1.x32.o --bb-addr-map 2>&1 | FileCheck --match-full-lines -DADDR=0x11111 %s -DFILE=%t1.x32.o --check-prefixes=CHECK,RAW # RUN: llvm-readelf %t1.x32.o --bb-addr-map | FileCheck %s --check-prefix=GNU ## Check that a malformed section can be handled. # RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o -# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED ## Check that missing features can be handled. # RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o @@ -22,7 +24,7 @@ # CHECK: BBAddrMap [ # CHECK-NEXT: Function { # CHECK-NEXT: At: [[ADDR]] -# CHECK-NEXT: warning: '[[FILE]]': could not identify function symbol for address ([[ADDR]]) in SHT_LLVM_BB_ADDR_MAP section with index 3 +# CHECK-NEXT: {{.*}}: warning: '[[FILE]]': could not identify function symbol for address ([[ADDR]]) in SHT_LLVM_BB_ADDR_MAP section with index 3 # CHECK-NEXT: Name: # CHECK-NEXT: BB Ranges [ # CHECK-NEXT: { @@ -55,16 +57,19 @@ # CHECK-NEXT: FuncEntryCount: 100 # CHECK-NEXT: PGO BB entries [ # CHECK-NEXT: { -# CHECK-NEXT: Frequency: 100 +# RAW-NEXT: Frequency: 100 +# PRETTY-NEXT: Frequency: 1.0 # CHECK-NEXT: Successors [ # CHECK-NEXT: { # CHECK-NEXT: ID: 2 -# CHECK-NEXT: Probability: 0xFFFFFFFF +# RAW-NEXT: Probability: 0x80000000 +# PRETTY-NEXT: Probability: 0x80000000 / 0x80000000 = 100.00% # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: { -# CHECK-NEXT: Frequency: 100 +# RAW-NEXT: Frequency: 100 +# PRETTY-NEXT: Frequency: 1.0 # CHECK-NEXT: Successors [ # CHECK-NEXT: ] # CHECK-NEXT: } @@ -95,7 +100,8 @@ # CHECK-NEXT: FuncEntryCount: 8888 # CHECK-NEXT: PGO BB entries [ # CHECK-NEXT: { -# CHECK-NEXT: Frequency: 9000 +# RAW-NEXT: Frequency: 9000 +# PRETTY-NEXT: Frequency: 1.0 # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } @@ -104,8 +110,10 @@ # GNU: GNUStyle::printBBAddrMaps not implemented +# PRETTY-NO-BAM: warning: --bb-addr-map must be enabled for --pretty-pgo-analysis-map to have an effect + # TRUNCATED: BBAddrMap [ -# TRUNCATED-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: unable to decode LEB128 at offset [[OFFSET]]: malformed uleb128, extends past end +# TRUNCATED-NEXT: {{.*}}: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: unable to decode LEB128 at offset [[OFFSET]]: malformed uleb128, extends past end # TRUNCATED-NEXT: ] ## Check that the other valid section is properly dumped. # TRUNCATED-NEXT: BBAddrMap [ @@ -192,7 +200,7 @@ Sections: - BBFreq: 100 Successors: - ID: 2 - BrProb: 0xFFFFFFFF + BrProb: 0x80000000 - BBFreq: 100 Successors: [] - FuncEntryCount: 8888 diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td index e3e74762420d..c3dc5d668ce7 100644 --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -202,6 +202,10 @@ def : Flag<["-"], "t">, Alias, HelpText<"Alias for --syms">; def symbolize_operands : Flag<["--"], "symbolize-operands">, HelpText<"Symbolize instruction operands when disassembling">; +def pretty_pgo_analysis_map : Flag<["--"], "pretty-pgo-analysis-map">, + HelpText<"Display PGO analysis values with " + "formatting rather than raw numbers">; + def dynamic_syms : Flag<["--"], "dynamic-syms">, HelpText<"Display the contents of the dynamic symbol table">; def : Flag<["-"], "T">, Alias, diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 3926af60c1ee..1a450b9ff62b 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -198,8 +198,10 @@ public: const BBAddrMap &getAddrMap() const { return AddrMap; } // Returns the PGO string associated with the entry of index `PGOBBEntryIndex` - // in `PGOMap`. - std::string constructPGOLabelString(size_t PGOBBEntryIndex) const { + // in `PGOMap`. If PrettyPGOAnalysis is true, prints BFI as relative frequency + // and BPI as percentage. Otherwise raw values are displayed. + std::string constructPGOLabelString(size_t PGOBBEntryIndex, + bool PrettyPGOAnalysis) const { if (!PGOMap.FeatEnable.hasPGOAnalysis()) return ""; std::string PGOString; @@ -221,7 +223,12 @@ public: PGOMap.BBEntries[PGOBBEntryIndex]; if (PGOMap.FeatEnable.BBFreq) { - PGOSS << "Frequency: " << Twine(PGOBBEntry.BlockFreq.getFrequency()); + PGOSS << "Frequency: "; + if (PrettyPGOAnalysis) + printRelativeBlockFreq(PGOSS, PGOMap.BBEntries.front().BlockFreq, + PGOBBEntry.BlockFreq); + else + PGOSS << Twine(PGOBBEntry.BlockFreq.getFrequency()); if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { PGOSS << ", "; } @@ -230,9 +237,12 @@ public: PGOSS << "Successors: "; interleaveComma( PGOBBEntry.Successors, PGOSS, - [&PGOSS](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { + [&](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { PGOSS << "BB" << SE.ID << ":"; - PGOSS.write_hex(SE.Prob.getNumerator()); + if (PrettyPGOAnalysis) + PGOSS << "[" << SE.Prob << "]"; + else + PGOSS.write_hex(SE.Prob.getNumerator()); }); } } @@ -333,6 +343,7 @@ static bool HasStopAddressFlag; bool objdump::SymbolTable; static bool SymbolizeOperands; +static bool PrettyPGOAnalysisMap; static bool DynamicSymbolTable; std::string objdump::TripleName; bool objdump::UnwindInfo; @@ -1278,8 +1289,8 @@ static void collectBBAddrMapLabels( std::string LabelString = ("BB" + Twine(BBEntry.ID)).str(); Labels[BBAddress].push_back( - {LabelString, - FunctionMap->constructPGOLabelString(NumBBEntriesBeforeRange + I)}); + {LabelString, FunctionMap->constructPGOLabelString( + NumBBEntriesBeforeRange + I, PrettyPGOAnalysisMap)}); } } @@ -3187,6 +3198,10 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { HasStopAddressFlag = InputArgs.hasArg(OBJDUMP_stop_address_EQ); SymbolTable = InputArgs.hasArg(OBJDUMP_syms); SymbolizeOperands = InputArgs.hasArg(OBJDUMP_symbolize_operands); + PrettyPGOAnalysisMap = InputArgs.hasArg(OBJDUMP_pretty_pgo_analysis_map); + if (PrettyPGOAnalysisMap && !SymbolizeOperands) + reportCmdLineWarning("--symbolize-operands must be enabled for " + "--pretty-pgo-analysis-map to have an effect"); DynamicSymbolTable = InputArgs.hasArg(OBJDUMP_dynamic_syms); TripleName = InputArgs.getLastArgValue(OBJDUMP_triple_EQ).str(); UnwindInfo = InputArgs.hasArg(OBJDUMP_unwind_info); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 13c19aab2d60..b7a2ff8dd5f3 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -588,7 +588,7 @@ public: void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; void printCGProfile() override; - void printBBAddrMaps() override; + void printBBAddrMaps(bool PrettyPGOAnalysis) override; void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; @@ -696,7 +696,7 @@ public: void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; void printCGProfile() override; - void printBBAddrMaps() override; + void printBBAddrMaps(bool PrettyPGOAnalysis) override; void printAddrsig() override; void printNotes() override; void printELFLinkerOptions() override; @@ -4992,7 +4992,8 @@ template void GNUELFDumper::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } -template void GNUELFDumper::printBBAddrMaps() { +template +void GNUELFDumper::printBBAddrMaps(bool /*PrettyPGOAnalysis*/) { OS << "GNUStyle::printBBAddrMaps not implemented\n"; } @@ -7414,7 +7415,8 @@ template void LLVMELFDumper::printCGProfile() { } } -template void LLVMELFDumper::printBBAddrMaps() { +template +void LLVMELFDumper::printBBAddrMaps(bool PrettyPGOAnalysis) { bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL; using Elf_Shdr = typename ELFT::Shdr; auto IsMatch = [](const Elf_Shdr &Sec) -> bool { @@ -7493,21 +7495,28 @@ template void LLVMELFDumper::printBBAddrMaps() { for (const PGOAnalysisMap::PGOBBEntry &PBBE : PAM.BBEntries) { DictScope L(W); - /// FIXME: currently we just emit the raw frequency, it may be - /// better to provide an option to scale it by the first entry - /// frequence using BlockFrequency::Scaled64 number - if (PAM.FeatEnable.BBFreq) - W.printNumber("Frequency", PBBE.BlockFreq.getFrequency()); + if (PAM.FeatEnable.BBFreq) { + if (PrettyPGOAnalysis) { + std::string BlockFreqStr; + raw_string_ostream SS(BlockFreqStr); + printRelativeBlockFreq(SS, PAM.BBEntries.front().BlockFreq, + PBBE.BlockFreq); + W.printString("Frequency", BlockFreqStr); + } else { + W.printNumber("Frequency", PBBE.BlockFreq.getFrequency()); + } + } if (PAM.FeatEnable.BrProb) { ListScope L(W, "Successors"); for (const auto &Succ : PBBE.Successors) { DictScope L(W); W.printNumber("ID", Succ.ID); - /// FIXME: currently we just emit the raw numerator of the - /// probably, it may be better to provide an option to emit it - /// as a percentage or other prettied representation - W.printHex("Probability", Succ.Prob.getNumerator()); + if (PrettyPGOAnalysis) { + W.printObject("Probability", Succ.Prob); + } else { + W.printHex("Probability", Succ.Prob.getNumerator()); + } } } } diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index 921792f886d0..adb1c5e1a5bf 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -129,7 +129,9 @@ public: virtual void printGroupSections() {} virtual void printHashHistograms() {} virtual void printCGProfile() {} - virtual void printBBAddrMaps() {} + // If PrettyPGOAnalysis is true, prints BFI as relative frequency and BPI as + // percentage. Otherwise raw values are displayed. + virtual void printBBAddrMaps(bool PrettyPGOAnalysis) {} virtual void printAddrsig() {} virtual void printNotes() {} virtual void printELFLinkerOptions() {} diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td index fec0adb5e6a6..1dba47d29ac9 100644 --- a/llvm/tools/llvm-readobj/Opts.td +++ b/llvm/tools/llvm-readobj/Opts.td @@ -19,6 +19,7 @@ def all : FF<"all", "Equivalent to setting: --file-header, --program-headers, -- "--section-groups and --histogram">; def arch_specific : FF<"arch-specific", "Display architecture-specific information">; def bb_addr_map : FF<"bb-addr-map", "Display the BB address map section">; +def pretty_pgo_analysis_map : FF<"pretty-pgo-analysis-map", "Display PGO analysis values with formatting rather than raw numbers">; def cg_profile : FF<"cg-profile", "Display call graph profile section">; defm demangle : BB<"demangle", "Demangle symbol names", "Do not demangle symbol names (default)">; def dependent_libraries : FF<"dependent-libraries", "Display the dependent libraries section">; diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index d72eec04d06a..24768c2b0a69 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -104,6 +104,7 @@ static bool Addrsig; static bool All; static bool ArchSpecificInfo; static bool BBAddrMap; +static bool PrettyPGOAnalysisMap; bool ExpandRelocs; static bool CGProfile; bool Demangle; @@ -219,6 +220,11 @@ static void parseOptions(const opt::InputArgList &Args) { opts::All = Args.hasArg(OPT_all); opts::ArchSpecificInfo = Args.hasArg(OPT_arch_specific); opts::BBAddrMap = Args.hasArg(OPT_bb_addr_map); + opts::PrettyPGOAnalysisMap = Args.hasArg(OPT_pretty_pgo_analysis_map); + if (opts::PrettyPGOAnalysisMap && !opts::BBAddrMap) + WithColor::warning(errs(), ToolName) + << "--bb-addr-map must be enabled for --pretty-pgo-analysis-map to " + "have an effect\n"; opts::CGProfile = Args.hasArg(OPT_cg_profile); opts::Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, false); opts::DependentLibraries = Args.hasArg(OPT_dependent_libraries); @@ -470,7 +476,7 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer, if (opts::CGProfile) Dumper->printCGProfile(); if (opts::BBAddrMap) - Dumper->printBBAddrMaps(); + Dumper->printBBAddrMaps(opts::PrettyPGOAnalysisMap); if (opts::Addrsig) Dumper->printAddrsig(); if (opts::Notes) -- Gitee From 92525f9e718aa9e65690ba696d543591c8430527 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Wed, 28 Feb 2024 01:34:48 +0000 Subject: [PATCH 42/47] [Driver] Allow -fbasic-block-address-map for AArch64 ELF (#82662) Emitting the basic block address map with `-fbasic-block-sections=labels` is allowed for AArch64 ELF since 7eaf94fefa1250fc8a46982cea8ce99abacae11f. Allow doing so with `-fbasic-block-address-map`. --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/Driver/basic-block-address-map.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1d48d0d22949..46c3f70e21ef 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5963,7 +5963,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_address_map, options::OPT_fno_basic_block_address_map)) { - if (Triple.isX86() && Triple.isOSBinFormatELF()) { + if ((Triple.isX86() || Triple.isAArch64()) && Triple.isOSBinFormatELF()) { if (A->getOption().matches(options::OPT_fbasic_block_address_map)) A->render(Args, CmdArgs); } else { diff --git a/clang/test/Driver/basic-block-address-map.c b/clang/test/Driver/basic-block-address-map.c index 022f972b412d..12393e8ebfd5 100644 --- a/clang/test/Driver/basic-block-address-map.c +++ b/clang/test/Driver/basic-block-address-map.c @@ -1,8 +1,9 @@ -// RUN: %clang -### -target x86_64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s +// RUN: %clang -### --target=x86_64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s +// RUN: %clang -### --target=aarch64 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-PRESENT %s // CHECK-PRESENT: -fbasic-block-address-map -// RUN: %clang -### -target x86_64 -fno-basic-block-address-map %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT +// RUN: %clang -### --target=x86_64 -fno-basic-block-address-map %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-ABSENT // CHECK-ABSENT-NOT: -fbasic-block-address-map -// RUN: not %clang -c -target x86_64-apple-darwin10 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s +// RUN: not %clang -c --target=x86_64-apple-darwin10 -fbasic-block-address-map %s -S 2>&1 | FileCheck -check-prefix=CHECK-TRIPLE %s // CHECK-TRIPLE: error: unsupported option '-fbasic-block-address-map' for target -- Gitee From 1ab2098fa8b7eb1dd6ea62cfca92d06a100ba53e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 3 Jun 2024 17:22:43 -0700 Subject: [PATCH 43/47] [Codegen, BasicBlockSections] Avoid cloning blocks which have their machine block address taken. (#94296) These blocks usually show up in the form of branches within inline assembly. Since it's hard to rewire them, we fully omit paths with such blocks from path cloning. --- llvm/lib/CodeGen/BasicBlockPathCloning.cpp | 10 +++++++ .../basic-block-sections-cloning-invalid.ll | 27 ++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index 901542e8507b..19f824850607 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -119,6 +119,16 @@ bool IsValidCloning(const MachineFunction &MF, return false; } } + if (PathBB->isMachineBlockAddressTaken()) { + // Avoid cloning blocks which have their address taken since we can't + // rewire branches to those blocks as easily (e.g., branches within + // inline assembly). + WithColor::warning() + << "block #" << BBID + << " has its machine block address taken in function " + << MF.getName() << "\n"; + return false; + } } if (I != ClonePath.size() - 1 && !PathBB->empty() && diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll index 521ec43ef050..c316ef9f8f26 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll @@ -13,8 +13,8 @@ declare void @effect(i32 zeroext) ; RUN: echo 'v1' > %t2 ; RUN: echo 'f foo' >> %t2 ; RUN: echo 'p 0 2 3' >> %t2 -; RUN: echo 'p 0 1 3' >> %t2 -; RUN: echo 'c 0 1.1 3.2 2.1 3.1 1' >> %t2 +; RUN: echo 'p 0 1 2 3' >> %t2 +; RUN: echo 'c 0 1.1 2.2 3.2 2.1 3.1 1' >> %t2 ; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s --check-prefixes=PATH ; RUN: FileCheck %s --check-prefixes=WARN1 < %t2.err ; RUN: echo 'v1' > %t3 @@ -23,6 +23,14 @@ declare void @effect(i32 zeroext) ; RUN: echo 'c 0 100.1 1' >> %t3 ; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s ; RUN: FileCheck %s --check-prefixes=WARN2 < %t3.err +; RUN: echo 'v1' > %t4 +; RUN: echo 'f foo' >> %t4 +; RUN: echo 'p 1 6' >> %t4 +; RUN: echo 'c 0 1 6.1' >> %t4 +; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t4 2> %t4.err | FileCheck %s +; RUN: FileCheck %s --check-prefixes=WARN3 < %t4.err + + define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) { b0: @@ -31,23 +39,29 @@ b0: b1: ; preds = %b0 call void @effect(i32 1) - br i1 %b, label %b2, label %b3 + br i1 %b, label %b2, label %b6 b2: ; preds = %b1 call void @effect(i32 2) br label %b3 -b3: ; preds = %b0, %b1, %b2 +b3: ; preds = %b0, %b2 call void @effect(i32 3) br i1 %c, label %b4, label %b5 b4: ; preds = %b3 call void @effect(i32 4) - br i1 %d, label %b5, label %cold + callbr void asm sideeffect "je ${0:l}", "!i,~{dirflag},~{fpsr},~{flags}"() + to label %b5 [label %b6] b5: ; preds = %b3, %b4 call void @effect(i32 5) ret void + +b6: ; preds = %b1, %b4 + call void @effect(i32 6) + ret void + cold: call void @effect(i32 6) ; preds = %b4 ret void @@ -59,7 +73,7 @@ cold: ; CHECK: je .LBB0_3 ; PATH: # %bb.7: # %b1 -; PATH: # %bb.8: # %b3 +; PATH: # %bb.8: # %b2 ; PATH: jne .LBB0_4 ; CHECK: # %bb.1: # %b1 ; CHECK: jne foo.cold @@ -69,4 +83,5 @@ cold: ;; Check the warnings ; WARN1: warning: block #2 is not a successor of block #0 in function foo ; WARN2: warning: no block with id 100 in function foo +; WARN3: warning: block #6 has its machine block address taken in function foo -- Gitee From bc42d5950c8197aca5271ed08772e10dcf39bb85 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 1 Jul 2024 13:55:58 +0200 Subject: [PATCH 44/47] [AsmPrinter] Reduce AsmPrinterHandlers virt. fn calls (#96785) Currently, an AsmPrinterHandler has several methods that allow to dynamically hook in unwind or debug info emission, e.g. at begin/end of every function or instruction. The class hierarchy and the actually overridden functions are as follows: (SymSz=setSymbolSize, mFE=markFunctionEnd, BBS=BasicBlockSection, FL=Funclet; b=beginX, e=endX) SymSz Mod Fn mFE BBS FL Inst AsmPrinterHandler - - - - - - - ` PseudoProbeHandler - - - - - - - ` WinCFGuard - e e - - - - ` EHStreamer - - - - - - - ` DwarfCFIException - e be - be - - ` ARMException - - be e - - - ` AIXException - - e - - - - ` WinException - e be e - be - ` WasmException - e be - - - - ` DebugHandlerBase - b be - be - be ` BTFDebug - e - - - - b ` CodeViewDebug - be - - - - b ` DWARFDebug yes be - - - - b Doing virtual function calls per instruction is costly and useless when the called function does nothing. This commit performs the following clean-up/improvements: - PseudoProbeHandler is no longer an AsmPrinterHandler -- it used nothing of its functionality to hook in at the possible points. This avoids virtual function calls when a pseudo probe printer is present. - DebugHandlerBase is no longer an AsmPrinterHandler, but a separate base class. DebugHandlerBase is the only remaining "hook" for begin/end instruction and setSymbolSize (only used by DWARFDebug). begin/end for function and basic block sections are never overriden and therefore are no longer virtual. (Originally I intended there to be only one debug handler, but BPF as the only target supports two at the same time: DWARF and BTF.) - AsmPrinterHandler no longer has begin/end instruction and setSymbolSize hooks -- these were only used by DebugHandlerBase. This avoid iterating over handlers in every instruction. AsmPrinterHandler Mod Fn mFE BBS FL ` WinCFGuard e e - - - ` EHStreamer - - - - - ` DwarfCFIException e be - be - ` ARMException - be e - - ` AIXException - e - - - ` WinException e be e - be ` WasmException e be - - - SymSz Mod Fn BBS Inst DebugHandlerBase - b be be be ` BTFDebug - e b ` CodeViewDebug - be b ` DWARFDebug yes be b PseudoProbeHandler (no shared methods) To continue allowing external users (e.g., Julia) to hook in at every instruction, a new method addDebugHandler is exposed. This results in a performance improvement, especially in the -O0 -g0 case with unwind information (e.g., JIT baseline). --- llvm/include/llvm/CodeGen/AsmPrinter.h | 22 +++-- llvm/include/llvm/CodeGen/AsmPrinterHandler.h | 10 -- llvm/include/llvm/CodeGen/DebugHandlerBase.h | 26 +++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 94 ++++++++++++------- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 2 - .../CodeGen/AsmPrinter/DebugHandlerBase.cpp | 2 + llvm/lib/CodeGen/AsmPrinter/EHStreamer.h | 5 - .../CodeGen/AsmPrinter/PseudoProbePrinter.cpp | 2 - .../CodeGen/AsmPrinter/PseudoProbePrinter.h | 13 +-- llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h | 8 -- llvm/lib/Target/BPF/BPFAsmPrinter.cpp | 5 +- llvm/lib/Target/BPF/BTFDebug.h | 2 - .../unittests/CodeGen/AsmPrinterDwarfTest.cpp | 63 ++++++++++++- 13 files changed, 157 insertions(+), 97 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 571bc0118c69..88e88a4f63bf 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinterHandler.h" +#include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/StackMaps.h" @@ -144,14 +145,14 @@ public: /// struct HandlerInfo and Handlers permit users or target extended /// AsmPrinter to add their own handlers. - struct HandlerInfo { - std::unique_ptr Handler; + template struct HandlerInfo { + std::unique_ptr Handler; StringRef TimerName; StringRef TimerDescription; StringRef TimerGroupName; StringRef TimerGroupDescription; - HandlerInfo(std::unique_ptr Handler, StringRef TimerName, + HandlerInfo(std::unique_ptr Handler, StringRef TimerName, StringRef TimerDescription, StringRef TimerGroupName, StringRef TimerGroupDescription) : Handler(std::move(Handler)), TimerName(TimerName), @@ -204,9 +205,13 @@ protected: /// A vector of all debug/EH info emitters we should use. This vector /// maintains ownership of the emitters. - std::vector Handlers; + SmallVector, 2> Handlers; size_t NumUserHandlers = 0; + /// Debuginfo handler. Protected so that targets can add their own. + SmallVector, 1> DebugHandlers; + size_t NumUserDebugHandlers = 0; + StackMaps SM; private: @@ -221,7 +226,7 @@ private: /// A handler that supports pseudo probe emission with embedded inline /// context. - PseudoProbeHandler *PP = nullptr; + std::unique_ptr PP; /// CFISection type the module needs i.e. either .eh_frame or .debug_frame. CFISection ModuleCFISection = CFISection::None; @@ -530,11 +535,16 @@ public: // Overridable Hooks //===------------------------------------------------------------------===// - void addAsmPrinterHandler(HandlerInfo Handler) { + void addAsmPrinterHandler(HandlerInfo Handler) { Handlers.insert(Handlers.begin(), std::move(Handler)); NumUserHandlers++; } + void addDebugHandler(HandlerInfo Handler) { + DebugHandlers.insert(DebugHandlers.begin(), std::move(Handler)); + NumUserDebugHandlers++; + } + // Targets can, or in the case of EmitInstruction, must implement these to // customize output. diff --git a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h index 5c06645f767e..ed73e618431d 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h +++ b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h @@ -34,10 +34,6 @@ class AsmPrinterHandler { public: virtual ~AsmPrinterHandler(); - /// For symbols that have a size designated (e.g. common symbols), - /// this tracks that size. - virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; - virtual void beginModule(Module *M) {} /// Emit all sections that should come after the content. @@ -72,12 +68,6 @@ public: virtual void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym = nullptr) {} virtual void endFunclet() {} - - /// Process beginning of an instruction. - virtual void beginInstruction(const MachineInstr *MI) = 0; - - /// Process end of an instruction. - virtual void endInstruction() = 0; }; } // End of namespace llvm diff --git a/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/llvm/include/llvm/CodeGen/DebugHandlerBase.h index af25f2544da7..36a844e7087f 100644 --- a/llvm/include/llvm/CodeGen/DebugHandlerBase.h +++ b/llvm/include/llvm/CodeGen/DebugHandlerBase.h @@ -50,10 +50,14 @@ struct DbgVariableLocation { /// Base class for debug information backends. Common functionality related to /// tracking which variables and scopes are alive at a given PC live here. -class DebugHandlerBase : public AsmPrinterHandler { +class DebugHandlerBase { protected: DebugHandlerBase(AsmPrinter *A); +public: + virtual ~DebugHandlerBase(); + +protected: /// Target of debug info emission. AsmPrinter *Asm = nullptr; @@ -116,18 +120,22 @@ protected: private: InstructionOrdering InstOrdering; - // AsmPrinterHandler overrides. public: - void beginModule(Module *M) override; + /// For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. Only used by DWARF. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) {} + + virtual void beginModule(Module *M); + virtual void endModule() = 0; - void beginInstruction(const MachineInstr *MI) override; - void endInstruction() override; + virtual void beginInstruction(const MachineInstr *MI); + virtual void endInstruction(); - void beginFunction(const MachineFunction *MF) override; - void endFunction(const MachineFunction *MF) override; + void beginFunction(const MachineFunction *MF); + void endFunction(const MachineFunction *MF); - void beginBasicBlockSection(const MachineBasicBlock &MBB) override; - void endBasicBlockSection(const MachineBasicBlock &MBB) override; + void beginBasicBlockSection(const MachineBasicBlock &MBB); + void endBasicBlockSection(const MachineBasicBlock &MBB); /// Return Label preceding the instruction. MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 322ae67a1e8a..d0758ad262cc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -163,10 +163,6 @@ const char CFGuardName[] = "Control Flow Guard"; const char CFGuardDescription[] = "Control Flow Guard"; const char CodeViewLineTablesGroupName[] = "linetables"; const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables"; -const char PPTimerName[] = "emit"; -const char PPTimerDescription[] = "Pseudo Probe Emission"; -const char PPGroupName[] = "pseudo probe"; -const char PPGroupDescription[] = "Pseudo Probe Emission"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -526,26 +522,23 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = M.getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.emplace_back(std::make_unique(this), - DbgTimerName, DbgTimerDescription, - CodeViewLineTablesGroupName, - CodeViewLineTablesGroupDescription); + DebugHandlers.emplace_back(std::make_unique(this), + DbgTimerName, DbgTimerDescription, + CodeViewLineTablesGroupName, + CodeViewLineTablesGroupDescription); } if (!EmitCodeView || M.getDwarfVersion()) { if (MMI->hasDebugInfo()) { DD = new DwarfDebug(this); - Handlers.emplace_back(std::unique_ptr(DD), DbgTimerName, - DbgTimerDescription, DWARFGroupName, - DWARFGroupDescription); + DebugHandlers.emplace_back(std::unique_ptr(DD), + DbgTimerName, DbgTimerDescription, + DWARFGroupName, DWARFGroupDescription); } } } - if (M.getNamedMetadata(PseudoProbeDescMetadataName)) { - PP = new PseudoProbeHandler(this); - Handlers.emplace_back(std::unique_ptr(PP), PPTimerName, - PPTimerDescription, PPGroupName, PPGroupDescription); - } + if (M.getNamedMetadata(PseudoProbeDescMetadataName)) + PP = std::make_unique(this); switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: @@ -611,7 +604,12 @@ bool AsmPrinter::doInitialization(Module &M) { CFGuardDescription, DWARFGroupName, DWARFGroupDescription); - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginModule(&M); + } + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginModule(&M); @@ -762,10 +760,9 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). const Align Alignment = getGVAlignment(GV, DL); - for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerDescription, - HI.TimerGroupName, HI.TimerGroupDescription, - TimePassesIsEnabled); + for (auto &HI : DebugHandlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->setSymbolSize(GVSym, Size); } @@ -1033,12 +1030,18 @@ void AsmPrinter::emitFunctionHeader() { } // Emit pre-function debug and/or EH information. - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginFunction(MF); + HI.Handler->beginBasicBlockSection(MF->front()); } - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginFunction(MF); + } + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginBasicBlockSection(MF->front()); @@ -1738,7 +1741,7 @@ void AsmPrinter::emitFunctionBody() { if (MDNode *MD = MI.getPCSections()) emitPCSectionsLabel(*MF, *MD); - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginInstruction(&MI); @@ -1832,7 +1835,7 @@ void AsmPrinter::emitFunctionBody() { if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endInstruction(); @@ -1967,13 +1970,18 @@ void AsmPrinter::emitFunctionBody() { // Call endBasicBlockSection on the last block now, if it wasn't already // called. if (!MF->back().isEndSection()) { - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endBasicBlockSection(MF->back()); + } + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endBasicBlockSection(MF->back()); } } - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->markFunctionEnd(); @@ -1986,7 +1994,12 @@ void AsmPrinter::emitFunctionBody() { emitJumpTableInfo(); // Emit post-function debug and/or EH information. - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endFunction(MF); + } + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endFunction(MF); @@ -2346,7 +2359,12 @@ bool AsmPrinter::doFinalization(Module &M) { emitStackMaps(); // Finalize debug and EH information. - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : DebugHandlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endModule(); + } + for (const auto &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endModule(); @@ -2356,6 +2374,8 @@ bool AsmPrinter::doFinalization(Module &M) { // keeping all the user-added handlers alive until the AsmPrinter is // destroyed. Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end()); + DebugHandlers.erase(DebugHandlers.begin() + NumUserDebugHandlers, + DebugHandlers.end()); DD = nullptr; // If the target wants to know about weak references, print them all. @@ -3852,7 +3872,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { - for (const HandlerInfo &HI : Handlers) { + for (const auto &HI : Handlers) { HI.Handler->endFunclet(); HI.Handler->beginFunclet(MBB); } @@ -3924,17 +3944,23 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // With BB sections, each basic block must handle CFI information on its own // if it begins a section (Entry block call is handled separately, next to // beginFunction). - if (MBB.isBeginSection() && !MBB.isEntryBlock()) - for (const HandlerInfo &HI : Handlers) + if (MBB.isBeginSection() && !MBB.isEntryBlock()) { + for (const auto &HI : DebugHandlers) + HI.Handler->beginBasicBlockSection(MBB); + for (const auto &HI : Handlers) HI.Handler->beginBasicBlockSection(MBB); + } } void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { // Check if CFI information needs to be updated for this MBB with basic block // sections. - if (MBB.isEndSection()) - for (const HandlerInfo &HI : Handlers) + if (MBB.isEndSection()) { + for (const auto &HI : DebugHandlers) + HI.Handler->endBasicBlockSection(MBB); + for (const auto &HI : Handlers) HI.Handler->endBasicBlockSection(MBB); + } } void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility, diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 1455ac417824..1a42eaf33b50 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -495,8 +495,6 @@ public: void beginModule(Module *M) override; - void setSymbolSize(const MCSymbol *, uint64_t) override {} - /// Emit the COFF section that holds the line table information. void endModule() override; diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index eb2d992c7e75..059afdac3d9f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -99,6 +99,8 @@ DbgVariableLocation::extractFromMachineInstruction( DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} +DebugHandlerBase::~DebugHandlerBase() = default; + void DebugHandlerBase::beginModule(Module *M) { if (M->debug_compile_units().empty()) Asm = nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index 234e62506a56..705a61fb827f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -150,11 +150,6 @@ public: EHStreamer(AsmPrinter *A); ~EHStreamer() override; - // Unused. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} - /// Return `true' if this is a call to a function marked `nounwind'. Return /// `false' otherwise. static bool callToNoUnwindFunction(const MachineInstr *MI); diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index 59c3fa15885e..5dda38383a65 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -20,8 +20,6 @@ using namespace llvm; -PseudoProbeHandler::~PseudoProbeHandler() = default; - void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc) { diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h index a92a89084cad..35461e53fbf1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h +++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h @@ -21,26 +21,17 @@ namespace llvm { class AsmPrinter; class DILocation; -class PseudoProbeHandler : public AsmPrinterHandler { +class PseudoProbeHandler { // Target of pseudo probe emission. AsmPrinter *Asm; // Name to GUID map, used as caching/memoization for speed. DenseMap NameGuidMap; public: - PseudoProbeHandler(AsmPrinter *A) : Asm(A){}; - ~PseudoProbeHandler() override; + PseudoProbeHandler(AsmPrinter *A) : Asm(A) {}; void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr, const DILocation *DebugLoc); - - // Unused. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void endModule() override {} - void beginFunction(const MachineFunction *MF) override {} - void endFunction(const MachineFunction *MF) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} }; } // namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h index 0e472af52c8f..f94acc912483 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -30,8 +30,6 @@ public: WinCFGuard(AsmPrinter *A); ~WinCFGuard() override; - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - /// Emit the Control Flow Guard function ID table. void endModule() override; @@ -44,12 +42,6 @@ public: /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. void endFunction(const MachineFunction *MF) override; - - /// Process beginning of an instruction. - void beginInstruction(const MachineInstr *MI) override {} - - /// Process end of an instruction. - void endInstruction() override {} }; } // namespace llvm diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index c8849bd50464..8b929070ffef 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -61,9 +61,8 @@ bool BPFAsmPrinter::doInitialization(Module &M) { // Only emit BTF when debuginfo available. if (MAI->doesSupportDebugInformation() && !M.debug_compile_units().empty()) { BTF = new BTFDebug(this); - Handlers.push_back(HandlerInfo(std::unique_ptr(BTF), "emit", - "Debug Info Emission", "BTF", - "BTF Emission")); + DebugHandlers.emplace_back(std::unique_ptr(BTF), "emit", + "Debug Info Emission", "BTF", "BTF Emission"); } return false; diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 7536006ed21c..411dea0a9d48 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -420,8 +420,6 @@ public: return DIToIdMap[Ty]; } - void setSymbolSize(const MCSymbol *Symbol, uint64_t Size) override {} - /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; diff --git a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp index b900e34fdd63..86f7706dad32 100644 --- a/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp +++ b/llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp @@ -9,6 +9,8 @@ #include "TestAsmPrinter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" +#include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -381,13 +383,10 @@ class AsmPrinterHandlerTest : public AsmPrinterFixtureBase { public: TestHandler(AsmPrinterHandlerTest &Test) : Test(Test) {} virtual ~TestHandler() {} - virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} virtual void beginModule(Module *M) override { Test.BeginCount++; } virtual void endModule() override { Test.EndCount++; } virtual void beginFunction(const MachineFunction *MF) override {} virtual void endFunction(const MachineFunction *MF) override {} - virtual void beginInstruction(const MachineInstr *MI) override {} - virtual void endInstruction() override {} }; protected: @@ -397,7 +396,7 @@ protected: return false; auto *AP = TestPrinter->getAP(); - AP->addAsmPrinterHandler(AsmPrinter::HandlerInfo( + AP->addAsmPrinterHandler(AsmPrinter::HandlerInfo( std::unique_ptr(new TestHandler(*this)), "TestTimerName", "TestTimerDesc", "TestGroupName", "TestGroupDesc")); LLVMTargetMachine *LLVMTM = static_cast(&AP->TM); @@ -409,7 +408,7 @@ protected: M->setDataLayout(LLVMTM->createDataLayout()); PM.run(*M); // Now check that we can run it twice. - AP->addAsmPrinterHandler(AsmPrinter::HandlerInfo( + AP->addAsmPrinterHandler(AsmPrinter::HandlerInfo( std::unique_ptr(new TestHandler(*this)), "TestTimerName", "TestTimerDesc", "TestGroupName", "TestGroupDesc")); PM.run(*M); @@ -428,4 +427,58 @@ TEST_F(AsmPrinterHandlerTest, Basic) { ASSERT_EQ(EndCount, 3); } +class AsmPrinterDebugHandlerTest : public AsmPrinterFixtureBase { + class TestDebugHandler : public DebugHandlerBase { + AsmPrinterDebugHandlerTest &Test; + + public: + TestDebugHandler(AsmPrinterDebugHandlerTest &Test, AsmPrinter *AP) + : DebugHandlerBase(AP), Test(Test) {} + virtual ~TestDebugHandler() {} + virtual void beginModule(Module *M) override { Test.BeginCount++; } + virtual void endModule() override { Test.EndCount++; } + virtual void beginFunctionImpl(const MachineFunction *MF) override {} + virtual void endFunctionImpl(const MachineFunction *MF) override {} + virtual void beginInstruction(const MachineInstr *MI) override {} + virtual void endInstruction() override {} + }; + +protected: + bool init(const std::string &TripleStr, unsigned DwarfVersion, + dwarf::DwarfFormat DwarfFormat) { + if (!AsmPrinterFixtureBase::init(TripleStr, DwarfVersion, DwarfFormat)) + return false; + + auto *AP = TestPrinter->getAP(); + AP->addDebugHandler(AsmPrinter::HandlerInfo( + std::make_unique(*this, AP), "TestTimerName", + "TestTimerDesc", "TestGroupName", "TestGroupDesc")); + LLVMTargetMachine *LLVMTM = static_cast(&AP->TM); + legacy::PassManager PM; + PM.add(new MachineModuleInfoWrapperPass(LLVMTM)); + PM.add(TestPrinter->releaseAP()); // Takes ownership of destroying AP + LLVMContext Context; + std::unique_ptr M(new Module("TestModule", Context)); + M->setDataLayout(LLVMTM->createDataLayout()); + PM.run(*M); + // Now check that we can run it twice. + AP->addDebugHandler(AsmPrinter::HandlerInfo( + std::make_unique(*this, AP), "TestTimerName", + "TestTimerDesc", "TestGroupName", "TestGroupDesc")); + PM.run(*M); + return true; + } + + int BeginCount = 0; + int EndCount = 0; +}; + +TEST_F(AsmPrinterDebugHandlerTest, Basic) { + if (!init("x86_64-pc-linux", /*DwarfVersion=*/4, dwarf::DWARF32)) + GTEST_SKIP(); + + ASSERT_EQ(BeginCount, 3); + ASSERT_EQ(EndCount, 3); +} + } // end namespace -- Gitee From 527807b51bc9ce5abebf60ba38597631e5ee85eb Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 19 Jun 2024 10:29:37 +0100 Subject: [PATCH 45/47] DenseMap: support enum class keys (#95972) Implemented using std::underlying_type. --- llvm/include/llvm/ADT/DenseMapInfo.h | 18 ++++++++++++++++++ llvm/unittests/ADT/DenseMapTest.cpp | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index 5b7dce7b53c6..c405f7446d5a 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -297,6 +297,24 @@ template struct DenseMapInfo> { } }; +// Provide DenseMapInfo for enum classes. +template +struct DenseMapInfo>> { + using UnderlyingType = std::underlying_type_t; + using Info = DenseMapInfo; + + static Enum getEmptyKey() { return static_cast(Info::getEmptyKey()); } + + static Enum getTombstoneKey() { + return static_cast(Info::getTombstoneKey()); + } + + static unsigned getHashValue(const Enum &Val) { + return Info::getHashValue(static_cast(Val)); + } + + static bool isEqual(const Enum &LHS, const Enum &RHS) { return LHS == RHS; } +}; } // end namespace llvm #endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/unittests/ADT/DenseMapTest.cpp b/llvm/unittests/ADT/DenseMapTest.cpp index cc3244528f27..becea622ce39 100644 --- a/llvm/unittests/ADT/DenseMapTest.cpp +++ b/llvm/unittests/ADT/DenseMapTest.cpp @@ -20,7 +20,6 @@ using namespace llvm; namespace { - uint32_t getTestKey(int i, uint32_t *) { return i; } uint32_t getTestValue(int i, uint32_t *) { return 42 + i; } @@ -35,6 +34,14 @@ uint32_t *getTestValue(int i, uint32_t **) { return &dummy_arr1[i]; } +enum class EnumClass { Val }; + +EnumClass getTestKey(int i, EnumClass *) { + // We can't possibly support 100 values for the swap test, so just return an + // invalid EnumClass for testing. + return static_cast(i); +} + /// A test class that tries to check that construction and destruction /// occur correctly. class CtorTester { @@ -103,14 +110,19 @@ template typename T::mapped_type *const DenseMapTest::dummy_value_ptr = nullptr; // Register these types for testing. +// clang-format off typedef ::testing::Types, DenseMap, DenseMap, + DenseMap, SmallDenseMap, SmallDenseMap, SmallDenseMap + CtorTesterMapInfo>, + SmallDenseMap > DenseMapTestTypes; +// clang-format on + TYPED_TEST_SUITE(DenseMapTest, DenseMapTestTypes, ); // Empty map tests -- Gitee From 3d4dba09f8a7403c4bff7f14b2ffb256bacef1fb Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Thu, 4 Jul 2024 09:52:38 +0800 Subject: [PATCH 46/47] [BasicBlockSections] Using MBBSectionID as DenseMap key (#97295) getSectionIDNum may return same value for two different MBBSectionID. e.g. A Cold type MBBSectionID with number 0 and a Default type MBBSectionID with number 2 get same value 2 from getSectionIDNum. This may lead to overwrite of MBBSectionRanges. Using MBBSectionID itself as DenseMap key is better choice. --- llvm/include/llvm/CodeGen/AsmPrinter.h | 4 +-- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 26 ++++++++++++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 +++++++----- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 4 +-- llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp | 4 +-- 6 files changed, 37 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 88e88a4f63bf..2be38fb3bafd 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -136,7 +136,7 @@ public: MCSymbol *BeginLabel, *EndLabel; }; - MapVector MBBSectionRanges; + MapVector MBBSectionRanges; /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of /// its number of uses by other globals. @@ -173,7 +173,7 @@ private: /// Map a basic block section ID to the exception symbol associated with that /// section. Map entries are assigned and looked up via /// AsmPrinter::getMBBExceptionSym. - DenseMap MBBSectionExceptionSyms; + DenseMap MBBSectionExceptionSyms; // The symbol used to represent the start of the current BB section of the // function. This is used to calculate the size of the BB section. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 1ee571f5e5a6..1d16aac7f445 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -13,6 +13,7 @@ #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H #define LLVM_CODEGEN_MACHINEBASICBLOCK_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/ilist.h" @@ -77,6 +78,25 @@ private: MBBSectionID(SectionType T) : Type(T), Number(0) {} }; +template <> struct DenseMapInfo { + using TypeInfo = DenseMapInfo; + using NumberInfo = DenseMapInfo; + + static inline MBBSectionID getEmptyKey() { + return MBBSectionID(NumberInfo::getEmptyKey()); + } + static inline MBBSectionID getTombstoneKey() { + return MBBSectionID(NumberInfo::getTombstoneKey()); + } + static unsigned getHashValue(const MBBSectionID &SecID) { + return detail::combineHashValue(TypeInfo::getHashValue(SecID.Type), + NumberInfo::getHashValue(SecID.Number)); + } + static bool isEqual(const MBBSectionID &LHS, const MBBSectionID &RHS) { + return LHS == RHS; + } +}; + // This structure represents the information for a basic block pertaining to // the basic block sections profile. struct UniqueBBID { @@ -649,12 +669,6 @@ public: /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } - /// Returns the unique section ID number of this basic block. - unsigned getSectionIDNum() const { - return ((unsigned)MBBSectionID::SectionType::Cold) - - ((unsigned)SectionID.Type) + SectionID.Number; - } - /// Sets the fixed BBID of this basic block. void setBBID(const UniqueBBID &V) { assert(!BBID.has_value() && "Cannot change BBID."); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d0758ad262cc..fda2904216e1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1381,7 +1381,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->emitULEB128IntValue(MBBSectionRanges.size()); } // Number of blocks in each MBB section. - MapVector MBBSectionNumBlocks; + MapVector MBBSectionNumBlocks; const MCSymbol *PrevMBBEndSymbol = nullptr; if (!Features.MultiBBRange) { OutStreamer->AddComment("function address"); @@ -1395,7 +1395,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { BBCount++; if (MBB.isEndSection()) { // Store each section's basic block count when it ends. - MBBSectionNumBlocks[MBB.getSectionIDNum()] = BBCount; + MBBSectionNumBlocks[MBB.getSectionID()] = BBCount; // Reset the count for the next section. BBCount = 0; } @@ -1411,8 +1411,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->AddComment("base address"); OutStreamer->emitSymbolValue(MBBSymbol, getPointerSize()); OutStreamer->AddComment("number of basic blocks"); - OutStreamer->emitULEB128IntValue( - MBBSectionNumBlocks[MBB.getSectionIDNum()]); + OutStreamer->emitULEB128IntValue(MBBSectionNumBlocks[MBB.getSectionID()]); PrevMBBEndSymbol = MBBSymbol; } // TODO: Remove this check when version 1 is deprecated. @@ -1861,7 +1860,9 @@ void AsmPrinter::emitFunctionBody() { OutContext); OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp); } - MBBSectionRanges[MBB.getSectionIDNum()] = + assert(!MBBSectionRanges.contains(MBB.getSectionID()) && + "Overwrite section range"); + MBBSectionRanges[MBB.getSectionID()] = MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()}; } } @@ -1987,7 +1988,9 @@ void AsmPrinter::emitFunctionBody() { HI.Handler->markFunctionEnd(); } - MBBSectionRanges[MF->front().getSectionIDNum()] = + assert(!MBBSectionRanges.contains(MF->front().getSectionID()) && + "Overwrite section range"); + MBBSectionRanges[MF->front().getSectionID()] = MBBSectionRange{CurrentFnBegin, CurrentFnEnd}; // Print out jump tables referenced by the function. @@ -2503,7 +2506,7 @@ bool AsmPrinter::doFinalization(Module &M) { } MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) { - auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum()); + auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionID()); if (Res.second) Res.first->second = createTempSymbol("exception"); return Res.first->second; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 4a70d1f07d6e..297d87315de4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -676,7 +676,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( // the order of blocks will be frozen beyond this point. do { if (MBB->sameSection(EndMBB) || MBB->isEndSection()) { - auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()]; + auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionID()]; List.push_back( {MBB->sameSection(BeginMBB) ? BeginLabel : MBBSectionRange.BeginLabel, diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1ae17ec9b874..c200c21f17c9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1713,7 +1713,7 @@ bool DwarfDebug::buildLocationList(SmallVectorImpl &DebugLoc, const MCSymbol *EndLabel; if (std::next(EI) == Entries.end()) { const MachineBasicBlock &EndMBB = Asm->MF->back(); - EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel; + EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionID()].EndLabel; if (EI->isClobber()) EndMI = EI->getInstr(); } @@ -2064,7 +2064,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { bool PrevInstInSameSection = (!PrevInstBB || - PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum()); + PrevInstBB->getSectionID() == MI->getParent()->getSectionID()); if (DL == PrevInstLoc && PrevInstInSameSection) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index eef6b1d93f36..e6edb2b158d5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -254,8 +254,8 @@ void EHStreamer::computeCallSiteTable( // We start a call-site range upon function entry and at the beginning of // every basic block section. CallSiteRanges.push_back( - {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel, - Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel, + {Asm->MBBSectionRanges[MBB.getSectionID()].BeginLabel, + Asm->MBBSectionRanges[MBB.getSectionID()].EndLabel, Asm->getMBBExceptionSym(MBB), CallSites.size()}); PreviousIsInvoke = false; SawPotentiallyThrowing = false; -- Gitee From 2cf6ad65b51064557b4c43377317d887ebde219d Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 22 Aug 2023 23:29:31 +0000 Subject: [PATCH 47/47] Remove checking stats from -gc-empty-basic-blocks test. The test does not require asserts. So it can't check the stats. --- llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll b/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll index bac885a71b4c..f737e43d3b81 100644 --- a/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll +++ b/llvm/test/CodeGen/X86/gc-empty-basic-blocks.ll @@ -1,8 +1,5 @@ ;; This test verifies that -gc-empty-basic-blocks removes empty blocks. -; RUN: llc < %s -mtriple=x86_64 -O0 -gc-empty-basic-blocks | FileCheck -check-prefix=CHECK %s -; RUN: llc < %s -mtriple=x86_64 -stats -O0 -gc-empty-basic-blocks 2>&1 | FileCheck -check-prefix=STAT %s - -; STAT: 1 gc-empty-basic-blocks - Number of empty blocks removed +; RUN: llc < %s -mtriple=x86_64 -O0 -gc-empty-basic-blocks | FileCheck %s define void @foo(i1 zeroext %0) nounwind { br i1 %0, label %2, label %empty_block -- Gitee