diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index 74907ad118d12f22266adc328e41ad61879cf744..acd98f906e45c663af7678f8f57ced68d2eab7c0 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -89,7 +89,7 @@ if (LLVM_INCLUDE_TESTS) endif() if (BOLT_ENABLE_RUNTIME) - message(STATUS "Building BOLT runtime libraries for X86") + message(STATUS "Building BOLT runtime libraries") set(extra_args "") if(CMAKE_SYSROOT) list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT}) diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 0c8935457366dbb702a633190d9a6958af0e7951..3dcbdf0bdb20dad7200462ddb427a780a035e20e 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -56,14 +56,6 @@ Allow processing of stripped binaries -- `--alt-inst-feature-size=` - - Size of feature field in .altinstructions - -- `--alt-inst-has-padlen` - - Specify that .altinstructions has padlen field - - `--asm-dump[=]` Dump function into assembly @@ -250,10 +242,6 @@ Redirect journaling to a file instead of stdout/stderr -- `--long-jump-labels` - - Always use long jumps/nops for Linux kernel static keys - - `--match-profile-with-function-hash` Match profile with function hash diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index 9a9d7b8735d714d0d116807d6efdab8ad5cab542..db2d7b7e976d5fd308de52be326b6c46657aa1eb 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -689,10 +689,16 @@ public: void setCanOutline(const bool Flag) { CanOutline = Flag; } + void undefineLabels() { + for (const MCInst &Inst : Instructions) + undefineInstLabel(Inst); + } + /// Erase pseudo instruction at a given iterator. /// Return iterator following the removed instruction. iterator erasePseudoInstruction(iterator II) { --NumPseudos; + undefineInstLabel(*II); return Instructions.erase(II); } @@ -700,6 +706,7 @@ public: /// Return iterator following the removed instruction. iterator eraseInstruction(iterator II) { adjustNumPseudos(*II, -1); + undefineInstLabel(*II); return Instructions.erase(II); } @@ -717,6 +724,7 @@ public: /// Erase all instructions. void clear() { + undefineLabels(); Instructions.clear(); NumPseudos = 0; } @@ -741,6 +749,7 @@ public: adjustNumPseudos(Begin, End, 1); auto I = II - Instructions.begin(); + undefineInstLabel(*II); Instructions.insert(Instructions.erase(II), Begin, End); return I + Instructions.begin(); } @@ -913,6 +922,8 @@ public: uint64_t getHash() const { return Hash; } private: + void undefineInstLabel(const llvm::MCInst &Inst); + void adjustNumPseudos(const MCInst &Inst, int Sign); template void adjustNumPseudos(Itr Begin, Itr End, int Sign) { diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index b3cf9f834cc0839e2d9a1e6e4c3962ed214b9745..6c997269cff5934e0afc8ef229b8675736973be1 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -426,6 +426,13 @@ public: Address); } + bool isInRange(StringRef NameStart, StringRef NameEnd, + uint64_t Address) const { + ErrorOr Start = getSymbolValue(NameStart); + ErrorOr End = getSymbolValue(NameEnd); + return Start && End && *Start <= Address && Address < *End; + } + /// Return size of an entry for the given jump table \p Type. uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const { return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); @@ -538,6 +545,11 @@ public: /// binary and functions created by BOLT. std::vector getAllBinaryFunctions(); + void undefineInstLabel(const MCInst &Inst) { + if (MCSymbol *const Label = MIB->getInstLabel(Inst)) + UndefinedSymbols.insert(Label); + } + /// Construct a jump table for \p Function at \p Address or return an existing /// one at that location. /// @@ -606,6 +618,9 @@ public: /// Addresses reserved for kernel on x86_64 start at this location. static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000; + /// Addresses reserved for kernel on aarch64 start at this location. + static constexpr uint64_t KernelStartAArch64 = 0xFFFF'0000'0000'0000; + /// Map address to a constant island owner (constant data in code section) std::map AddressToConstantIslandMap; @@ -749,6 +764,8 @@ public: /// Area in the input binary reserved for BOLT. AddressRange BOLTReserved; + AddressRange BOLTReservedRW; + /// Address of the code/function that is executed before any other code in /// the binary. std::optional StartFunctionAddress; @@ -884,7 +901,11 @@ public: /// Return a value of the global \p Symbol or an error if the value /// was not set. ErrorOr getSymbolValue(const MCSymbol &Symbol) const { - const BinaryData *BD = getBinaryDataByName(Symbol.getName()); + return getSymbolValue(Symbol.getName()); + } + + ErrorOr getSymbolValue(StringRef Name) const { + const BinaryData *BD = getBinaryDataByName(Name); if (!BD) return std::make_error_code(std::errc::bad_address); return BD->getAddress(); @@ -1202,6 +1223,13 @@ public: return const_cast(this)->getSectionForAddress(Address); } + ErrorOr getSectionForOutputAddress(uint64_t Address); + ErrorOr + getSectionForOutputAddress(uint64_t Address) const { + return const_cast(this)->getSectionForOutputAddress( + Address); + } + /// Return internal section representation for a section in a file. BinarySection *getSectionForSectionRef(SectionRef Section) const { return SectionRefToBinarySection.lookup(Section); diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h index 8a67b3e73b802d2a2a620f13bb59b58c7689aa9f..fe3365f36beda728e037d884a17616613d49c7eb 100644 --- a/bolt/include/bolt/Core/BinaryData.h +++ b/bolt/include/bolt/Core/BinaryData.h @@ -169,6 +169,11 @@ public: return Parent && (Parent == BD || Parent->isAncestorOf(BD)); } + void updateSize(uint64_t N) { + if (N > Size) + Size = N; + } + void setIsMoveable(bool Flag) { IsMoveable = Flag; } void setSection(BinarySection &NewSection); void setOutputSection(BinarySection &NewSection) { diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index da3fc433b7a3b1fe9ecd9924bff0fde54479d4a4..5fe31214721b9b7c0ed8ad9833a963b50da0e438 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -295,6 +295,12 @@ private: /// Pseudo functions should not be disassembled or emitted. bool IsPseudo{false}; + // True if address of this function can not be changed + bool KeepAddress{false}; + + // True if code of this function might be changed at run time + bool MayChange{false}; + /// True if the original function code has all necessary relocations to track /// addresses of functions emitted to new locations. Typically set for /// functions that we are not going to emit. @@ -1176,6 +1182,21 @@ public: /// Return true if all callbacks returned true, false otherwise. bool forEachEntryPoint(EntryPointCallbackTy Callback) const; + void undefineLabels() { + for (std::pair &LI : Labels) + BC.UndefinedSymbols.insert(LI.second); + + for (MCSymbol *const EndLabel : FunctionEndLabels) + if (EndLabel) + BC.UndefinedSymbols.insert(EndLabel); + + for (const std::pair &II : Instructions) + BC.undefineInstLabel(II.second); + + for (BinaryBasicBlock *BB : BasicBlocks) + BB->undefineLabels(); + } + /// Return MC symbol associated with the end of the function. MCSymbol * getFunctionEndLabel(const FragmentNum Fragment = FragmentNum::main()) const { @@ -1221,6 +1242,17 @@ public: return Islands->FunctionColdConstantIslandLabel; } + const FunctionFragment * + getFunctionFragmentForOutputAddress(uint64_t OutputAddress) const { + for (const FunctionFragment &FF : Layout.fragments()) { + uint64_t Address = FF.getAddress(); + uint64_t Size = FF.getImageSize(); + if (Address <= OutputAddress && OutputAddress < Address + Size) + return &FF; + } + return nullptr; + } + /// Return true if this is a function representing a PLT entry. bool isPLTFunction() const { return PLTSymbol != nullptr; } @@ -1296,6 +1328,12 @@ public: /// otherwise processed. bool isPseudo() const { return IsPseudo; } + /// Return true if address of this function can not be changed + bool mustKeepAddress() const { return KeepAddress; } + + /// Return true if code of this function might be changed at run time + bool mayChange() const { return MayChange; } + /// Return true if the function contains explicit or implicit indirect branch /// to its split fragments, e.g., split jump table, landing pad in split /// fragment. @@ -1723,6 +1761,8 @@ public: /// Mark the function as using ORC format for stack unwinding. void setHasORC(bool V) { HasORC = V; } + void setMayChange() { MayChange = true; } + BinaryFunction &setPersonalityFunction(uint64_t Addr) { assert(!PersonalityFunction && "can't set personality function twice"); PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, "FUNCat"); diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index d362961176b3262d259110562373eee30316f703..471c477c01f44da817d0c99fb428458ff33dff4e 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -390,6 +390,10 @@ public: Patches.emplace_back(BinaryPatch(Offset, Bytes)); } + void addPatch(uint64_t Offset, StringRef Bytes) { + addPatch(Offset, SmallVector(Bytes.begin(), Bytes.end())); + } + /// Register patcher for this section. void registerPatcher(std::unique_ptr BPatcher) { Patcher = std::move(BPatcher); diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h index 6a13cbec69fee7f55534d8c1df7de1aeabc45f5d..92c94e53d8e434e16817dfb5a0028c3286e1a27d 100644 --- a/bolt/include/bolt/Core/FunctionLayout.h +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -117,6 +117,10 @@ public: uint64_t getFileOffset() const { return FileOffset; } void setFileOffset(uint64_t Offset) { FileOffset = Offset; } + uint8_t *getOutputData() const { + return reinterpret_cast(getImageAddress()); + } + unsigned size() const { return Size; }; bool empty() const { return size() == 0; }; iterator begin(); diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 32eda0b283b883bc91487eb3472d9b2a44219e88..5a8a4f6e391c9dcac77e53d033f4184f1e4f0258 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -672,6 +672,12 @@ public: return StringRef(); } + /// Used to fill the executable space with undefined instructions. + virtual StringRef getUndefFillValue() const { + llvm_unreachable("not implemented"); + return StringRef(); + } + /// Interface and basic functionality of a MCInstMatcher. The idea is to make /// it easy to match one or more MCInsts against a tree-like pattern and /// extract the fragment operands. Example: diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h index 933f62a31f8fd7357f62fca5a34b1334b6a2da07..5bb8e2c569c999fc3e67f55237352ec672c047d1 100644 --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -92,6 +92,9 @@ struct Relocation { /// Return true if relocation type is RELATIVE static bool isRelative(uint64_t Type); + /// Return true if relocation type is GLOB_DAT + static bool isGlobDat(uint64_t Type); + /// Return true if relocation type is IRELATIVE static bool isIRelative(uint64_t Type); @@ -124,6 +127,10 @@ struct Relocation { /// otherwise. bool isRelative() const { return isRelative(Type); } + /// Return true if this relocation is R_*_GLOB_DAT type. Return false + /// otherwise. + bool isGlobDat() const { return isGlobDat(Type); } + /// Return true if this relocation is R_*_IRELATIVE type. Return false /// otherwise. bool isIRelative() const { return isIRelative(Type); } diff --git a/bolt/include/bolt/Passes/ADRRelaxationPass.h b/bolt/include/bolt/Passes/ADRRelaxationPass.h index 1d35a335c0250c4a38e84d814d2f0107e9c3e2e3..b9f92dec7f03b66bdef3b4b2cc31ac5ef3da36d2 100644 --- a/bolt/include/bolt/Passes/ADRRelaxationPass.h +++ b/bolt/include/bolt/Passes/ADRRelaxationPass.h @@ -25,7 +25,8 @@ namespace bolt { class ADRRelaxationPass : public BinaryFunctionPass { public: - explicit ADRRelaxationPass() : BinaryFunctionPass(false) {} + explicit ADRRelaxationPass(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} const char *getName() const override { return "adr-relaxation"; } diff --git a/bolt/include/bolt/Passes/PatchEntries.h b/bolt/include/bolt/Passes/PatchEntries.h index fa6b5811a4c3b1956be22df2a59b6064ab6c8325..e4982b5c6529cc3e95290030ac85a9250dd49763 100644 --- a/bolt/include/bolt/Passes/PatchEntries.h +++ b/bolt/include/bolt/Passes/PatchEntries.h @@ -33,6 +33,17 @@ class PatchEntries : public BinaryFunctionPass { public: explicit PatchEntries() : BinaryFunctionPass(false) {} + // Calculate the size of the patch. + static size_t getPatchSize(const BinaryContext &BC) { + static size_t PatchSize = 0; + if (!PatchSize) { + InstructionListType Seq; + BC.MIB->createLongTailCall(Seq, BC.Ctx->createTempSymbol(), BC.Ctx.get()); + PatchSize = BC.computeCodeSize(Seq.begin(), Seq.end()); + } + return PatchSize; + } + const char *getName() const override { return "patch-entries"; } Error runOnFunctions(BinaryContext &BC) override; }; diff --git a/bolt/include/bolt/Rewrite/MetadataRewriter.h b/bolt/include/bolt/Rewrite/MetadataRewriter.h index 6ff8f0af7a8e67e19ff7769f76de10f531979f3e..6988e5de4e6bda4aadd7fe06c2a17815574832d0 100644 --- a/bolt/include/bolt/Rewrite/MetadataRewriter.h +++ b/bolt/include/bolt/Rewrite/MetadataRewriter.h @@ -19,6 +19,8 @@ namespace llvm { namespace bolt { +class RewriteInstance; + /// Base class for handling file sections with metadata. In this context, /// metadata encompasses a wide range of data that references code and other /// data. Such metadata may or may not have an impact on program execution. @@ -34,10 +36,14 @@ class MetadataRewriter { StringRef Name; protected: + RewriteInstance &RI; + /// Provides access to the binary context. BinaryContext &BC; - MetadataRewriter(StringRef Name, BinaryContext &BC) : Name(Name), BC(BC) {} + MetadataRewriter(StringRef Name, RewriteInstance &RI); + + std::optional lookupSymbol(const StringRef Name); public: virtual ~MetadataRewriter() = default; diff --git a/bolt/include/bolt/Rewrite/MetadataRewriters.h b/bolt/include/bolt/Rewrite/MetadataRewriters.h index b71bd6cad2505272634f58db66606fbcec7dea96..76face9888235fd5e6c8487288718ae3708137e2 100644 --- a/bolt/include/bolt/Rewrite/MetadataRewriters.h +++ b/bolt/include/bolt/Rewrite/MetadataRewriters.h @@ -19,13 +19,13 @@ class BinaryContext; // The list of rewriter build functions. -std::unique_ptr createLinuxKernelRewriter(BinaryContext &); +std::unique_ptr createLinuxKernelRewriter(RewriteInstance &); -std::unique_ptr createBuildIDRewriter(BinaryContext &); +std::unique_ptr createBuildIDRewriter(RewriteInstance &); -std::unique_ptr createPseudoProbeRewriter(BinaryContext &); +std::unique_ptr createPseudoProbeRewriter(RewriteInstance &); -std::unique_ptr createSDTRewriter(BinaryContext &); +std::unique_ptr createSDTRewriter(RewriteInstance &); } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 16a82d5687de94fdca10a9d3c34c0c95c08ae49f..4878a33d78a57c62ed3da0ef44b45669977ea445 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -42,6 +42,8 @@ class ProfileReaderBase; /// optimizations) and rewriting. It also has the logic to coordinate such /// events. class RewriteInstance { + friend class MetadataRewriter; + public: // This constructor has complex initialization that can fail during // construction. Constructors can’t return errors, so clients must test \p Err @@ -420,6 +422,11 @@ private: static StringRef getBOLTReservedStart() { return "__bolt_reserved_start"; } static StringRef getBOLTReservedEnd() { return "__bolt_reserved_end"; } + static StringRef getBOLTReservedRWStart() { + return "__bolt_reserved_rw_start"; + } + static StringRef getBOLTReservedRWEnd() { return "__bolt_reserved_rw_end"; } + /// Common section names. static StringRef getEHFrameSectionName() { return ".eh_frame"; } static StringRef getEHFrameHdrSectionName() { return ".eh_frame_hdr"; } @@ -468,6 +475,12 @@ private: /// Track next available address for new allocatable sections. uint64_t NextAvailableAddress{0}; + uint64_t BOLTReservedStartAddress{0}; + uint64_t BOLTReservedEndAddress{0}; + + uint64_t BOLTReservedRWStartAddress{0}; + uint64_t BOLTReservedRWEndAddress{0}; + /// Location and size of dynamic relocations. std::optional DynamicRelocationsAddress; uint64_t DynamicRelocationsSize{0}; diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index 2a2192b79bb4bf90bbc2c6d7771264c9972f1b70..7da836a0bfffb4dc968e1edd562292f1d8a8d664 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -44,6 +44,11 @@ const JumpTable *BinaryBasicBlock::getJumpTable() const { return JT; } +void BinaryBasicBlock::undefineInstLabel(const llvm::MCInst &Inst) { + BinaryContext &BC = Function->getBinaryContext(); + BC.undefineInstLabel(Inst); +} + void BinaryBasicBlock::adjustNumPseudos(const MCInst &Inst, int Sign) { BinaryContext &BC = Function->getBinaryContext(); if (BC.MIB->isPseudo(Inst)) diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 6a1106f23e48578db939511c74176caf1ff3a699..7c3b4a87cfac8b63f4ca98e0dcf76bcdeecf7351 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -318,7 +318,7 @@ bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) return true; - if (SymbolName == "_end") + if (SymbolName == "_end" && !IsLinuxKernel) return true; return false; @@ -1073,6 +1073,7 @@ MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, BD = GAI->second; if (!BD->hasName(Name)) { GlobalSymbols[Name] = BD; + BD->updateSize(Size); BD->Symbols.push_back(Symbol); } } @@ -2059,6 +2060,23 @@ ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { return std::make_error_code(std::errc::bad_address); } +ErrorOr +BinaryContext::getSectionForOutputAddress(uint64_t Address) { + for (auto &Sec : allocatableSections()) { + // Skip pseudo sections that serve a purpose of creating a corresponding + // entry in section header table + if (Sec.getOutputContents().empty()) + continue; + + uint64_t OutputAddress = Sec.getOutputAddress(); + uint64_t OutputSize = Sec.getOutputSize(); + if (OutputAddress && OutputAddress <= Address && + Address < OutputAddress + OutputSize) + return Sec; + } + return std::make_error_code(std::errc::bad_address); +} + ErrorOr BinaryContext::getSectionNameForAddress(uint64_t Address) const { if (ErrorOr Section = getSectionForAddress(Address)) diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index f6dfa249f9a9f54217b39eb399b184dcc2197acc..99484c6b038d04d4f05e49f9baad18a704403b00 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -274,6 +274,10 @@ void BinaryEmitter::emitFunctions() { // Emit functions added by BOLT. emit(BC.getInjectedBinaryFunctions()); + for (BinaryFunction *BF : SortedFunctions) + if (!BF->isEmitted()) + BF->undefineLabels(); + // Mark the end of hot text. if (opts::HotText) { if (BC.HasWarmSection) @@ -359,11 +363,11 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, assert((Function.empty() || !(*Function.begin()).isCold()) && "first basic block should never be cold"); - // Emit UD2 at the beginning if requested by user. + // Emit undefined instruction at the beginning if requested by user. if (!opts::BreakFunctionNames.empty()) { for (std::string &Name : opts::BreakFunctionNames) { if (Function.hasNameRegex(Name)) { - Streamer.emitIntValue(0x0B0F, 2); // UD2: 0F 0B + Streamer.emitBytes(BC.MIB->getUndefFillValue()); break; } } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ea09371b57e8a6c27058b5aef9ff5bdfecfa2a82..193b8a5404ab16d9f18748424cd2283330ce93e6 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2994,17 +2994,11 @@ uint64_t BinaryFunction::getInstructionCount() const { } void BinaryFunction::clearDisasmState() { + undefineLabels(); + clearList(Instructions); clearList(IgnoredBranches); clearList(TakenBranches); - - if (BC.HasRelocations) { - for (std::pair &LI : Labels) - BC.UndefinedSymbols.insert(LI.second); - for (MCSymbol *const EndLabel : FunctionEndLabels) - if (EndLabel) - BC.UndefinedSymbols.insert(EndLabel); - } } void BinaryFunction::setTrapOnEntry() { diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp index 9ad49ca1b3a03832a31d77aad54e89dfa850e3dc..818ebb1c1ef5357e71f1cd615b18729e2aa7bf35 100644 --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -130,6 +130,18 @@ void BinarySection::emitAsData(MCStreamer &Streamer, } #endif + if (!BC.isRISCV() && std::distance(ROI, ROE) > 1) { + errs() << "BOLT-WARNING: multiple relocations at the same offset:\n"; + for (const auto &Relocation : make_range(ROI, ROE)) { + errs() << " " + << (Relocation.Symbol ? Relocation.Symbol->getName() + : StringRef("")) + << " at offset 0x" << Twine::utohexstr(Relocation.Offset) + << " with type " << Relocation.Type << '\n'; + } + ROI = std::prev(ROE); + } + size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer); SectionOffset += RelocationSize; } diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp index 15e6127ad2e9e82878a0393853ea5fb7329340b5..5055aa5a37482d5a53b5002234481f910da9be39 100644 --- a/bolt/lib/Core/FunctionLayout.cpp +++ b/bolt/lib/Core/FunctionLayout.cpp @@ -148,6 +148,10 @@ void FunctionLayout::eraseBasicBlocks( FF.StartIndex -= TotalErased; TotalErased += Erased; } + for (BinaryBasicBlock *BB : Blocks) { + if (IsErased(BB)) + BB->undefineLabels(); + } llvm::erase_if(Blocks, IsErased); // Remove empty fragments at the end diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp index 65e1032c579b5a9842a3f792346df720eb413e20..d3ca951d7e453d726d7a8dd23d86b0308e7a788b 100644 --- a/bolt/lib/Core/JumpTable.cpp +++ b/bolt/lib/Core/JumpTable.cpp @@ -85,7 +85,7 @@ void bolt::JumpTable::updateOriginal() { uint64_t EntryOffset = BaseOffset; for (MCSymbol *Entry : Entries) { const uint64_t RelType = - Type == JTT_NORMAL ? ELF::R_X86_64_64 : ELF::R_X86_64_PC32; + Type == JTT_NORMAL ? Relocation::getAbs64() : Relocation::getPC32(); const uint64_t RelAddend = Type == JTT_NORMAL ? 0 : EntryOffset - BaseOffset; // Replace existing relocation with the new one to allow any modifications diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 7ff7a2288451c844f360f9b24878d6d335be85ee..5d25a514862af4e182c5adb83612241a213ca748 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" #include #define DEBUG_TYPE "mcplus" @@ -293,7 +294,9 @@ MCSymbol *MCPlusBuilder::getOrCreateInstLabel(MCInst &Inst, const Twine &Name, if (Label) return Label; - Label = Ctx->createNamedTempSymbol(Name); + static uint64_t ID = 0; + Label = Ctx->createLocalSymbol(formatv("__bolt.{0}_{1}", Name, ++ID).str()); + setAnnotationOpValue(Inst, MCAnnotation::kLabel, reinterpret_cast(Label)); return Label; diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index 4e888a5b147aca41eb24a0b2237e21f80c38c0a6..203f702aa209219cd6e9687d3b940d9cd0d2ba26 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -892,6 +892,19 @@ bool Relocation::isRelative(uint64_t Type) { } } +bool Relocation::isGlobDat(uint64_t Type) { + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: + return Type == ELF::R_AARCH64_GLOB_DAT; + case Triple::riscv64: + return Type == ELF::R_RISCV_64; + case Triple::x86_64: + return Type == ELF::R_X86_64_GLOB_DAT; + } +} + bool Relocation::isIRelative(uint64_t Type) { switch (Arch) { default: diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp index ebb3925749b4d2775818937e0e7be83ebcd32fb9..64c1bf16bc3d93bc8b11ad4b55875bd4f90b3cdc 100644 --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -300,6 +300,9 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB, createIndCallDescription(FromFunction, From); BinaryContext &BC = FromFunction.getBinaryContext(); + if (BC.IsLinuxKernel) + return; + bool IsTailCall = BC.MIB->isTailCall(*Iter); InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall( std::move(*Iter), @@ -381,6 +384,17 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function, if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip)) return; + if (BC.IsLinuxKernel && BC.isAArch64()) { + // Do not instrument these functions, since they might be called before page + // table is initialized + for (const std::string &Name : std::vector{ + "strrchr", "strchr", "strcmp", "strncmp", "strlen", "strnlen", + "memcmp", "memchr", "memcpy", "memmove", "memset"}) { + if (Function.hasNameRegex(Name)) + return; + } + } + SplitWorklistTy SplitWorklist; SplitInstrsTy SplitInstrs; @@ -732,6 +746,10 @@ void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { BC.MIB->createInstrNumFuncsGetter(BC.Ctx.get())); if (BC.isELF()) { + if (BC.IsLinuxKernel) + assert(!BC.StartFunctionAddress && !BC.FiniFunctionAddress && + "Linux kernel should not have entry/fini function"); + if (BC.StartFunctionAddress) { BinaryFunction *Start = BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); @@ -788,8 +806,9 @@ void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) { Summary->IndCallTargetDescriptions.size() * sizeof(IndCallTargetDescription)) << " bytes in file\n"; - BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file " - << opts::InstrumentationFilename << "\n"; + if (!BC.IsLinuxKernel) + BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file " + << opts::InstrumentationFilename << "\n"; InstrumentationRuntimeLibrary *RtLibrary = static_cast(BC.getRuntimeLibrary()); diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 981d1b70af90702b4b352ba09503e7f699264494..68e34783ff99a17690ea1c85017d87e879a4e3c7 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Passes/PatchEntries.h" +#include "bolt/Utils/CommandLineOpts.h" #include "bolt/Utils/NameResolver.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" @@ -32,7 +33,7 @@ namespace llvm { namespace bolt { Error PatchEntries::runOnFunctions(BinaryContext &BC) { - if (!opts::ForcePatch) { + if (!opts::ForcePatch && !BC.IsLinuxKernel) { // Mark the binary for patching if we did not create external references // for original code in any of functions we are not going to emit. bool NeedsPatching = llvm::any_of( @@ -48,13 +49,9 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { if (opts::Verbosity >= 1) BC.outs() << "BOLT-INFO: patching entries in original code\n"; - // Calculate the size of the patch. - static size_t PatchSize = 0; - if (!PatchSize) { - InstructionListType Seq; - BC.MIB->createLongTailCall(Seq, BC.Ctx->createTempSymbol(), BC.Ctx.get()); - PatchSize = BC.computeCodeSize(Seq.begin(), Seq.end()); - } + static size_t PatchSize = getPatchSize(BC); + if (opts::Verbosity >= 1) + BC.outs() << "BOLT-INFO: patch size is " << PatchSize << "\n"; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; @@ -63,8 +60,16 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { if (!BC.shouldEmit(Function)) continue; + bool MustPatch = opts::ForcePatch; + + // In relocation mode, a copy will be created and only the copy can be + // changed. To avoid undefined behaviors, we must make the original function + // jump to the copy. + if (BC.HasRelocations && Function.mayChange()) + MustPatch = true; + // Check if we can skip patching the function. - if (!opts::ForcePatch && !Function.hasEHRanges() && + if (!MustPatch && !Function.hasEHRanges() && !opts::Instrument && Function.getSize() < PatchThreshold) continue; @@ -100,7 +105,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { if (!Success) { // We can't change output layout for AArch64 due to LongJmp pass if (BC.isAArch64()) { - if (opts::ForcePatch) { + if (MustPatch) { BC.errs() << "BOLT-ERROR: unable to patch entries in " << Function << "\n"; return createFatalBOLTError(""); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 5dfef0b71cc79f5bbeafc182eba62fff9717b8df..9ea382ad246c7f7a3cb4348e9c6736f8a8a860c1 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -125,6 +125,11 @@ static cl::opt PrintJTFootprintReduction( cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden, cl::cat(BoltOptCategory)); +static cl::opt + PrintAdrRelaxation("print-adr-relaxation", + cl::desc("print functions after ADR Relaxation pass"), + cl::Hidden, cl::cat(BoltOptCategory)); + static cl::opt PrintLongJmp("print-longjmp", cl::desc("print functions after longjmp pass"), cl::Hidden, @@ -377,6 +382,44 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(NeverPrint)); + if (BC.IsLinuxKernel && opts::Instrument && BC.isX86()) { + if (opts::Instrument) + Manager.registerPass(std::make_unique(NeverPrint)); + + Manager.registerPass(std::make_unique(PrintNormalized)); + // This pass syncs local branches with CFG. If any of the following + // passes breaks the sync - they either need to re-run the pass or + // fix branches consistency internally. + Manager.registerPass( + std::make_unique(PrintAfterBranchFixup)); + // Print final dyno stats right while CFG and instruction analysis are + // intact. + Manager.registerPass(std::make_unique( + "after all optimizations before SCTC and FOP"), + opts::PrintDynoStats || opts::DynoStatsAll); + + // This pass should always run last.* + Manager.registerPass(std::make_unique(PrintFinalized)); + // Assign each function an output section. + Manager.registerPass(std::make_unique()); + + // Patch original function entries + if (BC.HasRelocations) + Manager.registerPass(std::make_unique()); + + // In non-relocation mode, mark functions that do not fit into their + // original space as non-simple if we have to (e.g. for correct debug info + // update). NOTE: this pass depends on finalized code. + if (!BC.HasRelocations) + Manager.registerPass(std::make_unique(NeverPrint)); + + Manager.registerPass(std::make_unique(NeverPrint)); + // Check for dirty state of MCSymbols caused by running calculateEmittedSize + // in parallel and restore them + Manager.registerPass(std::make_unique(NeverPrint)); + return Manager.runPasses(); + } + if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); else if (opts::Hugify) @@ -490,7 +533,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique()); if (BC.isAArch64()) { - Manager.registerPass(std::make_unique()); + Manager.registerPass( + std::make_unique(PrintAdrRelaxation)); // Tighten branches according to offset differences between branch and // targets. No extra instructions after this pass, otherwise we may have diff --git a/bolt/lib/Rewrite/BuildIDRewriter.cpp b/bolt/lib/Rewrite/BuildIDRewriter.cpp index 83d0c9bfe182aeb91dd7d897c62f200dc95790b6..8a9c32619f6a9a9251d014b22dfea9c155aa422f 100644 --- a/bolt/lib/Rewrite/BuildIDRewriter.cpp +++ b/bolt/lib/Rewrite/BuildIDRewriter.cpp @@ -39,8 +39,8 @@ class BuildIDRewriter final : public MetadataRewriter { std::optional BuildIDSize; public: - BuildIDRewriter(StringRef Name, BinaryContext &BC) - : MetadataRewriter(Name, BC) {} + BuildIDRewriter(StringRef Name, RewriteInstance &RI) + : MetadataRewriter(Name, RI) {} Error sectionInitializer() override; @@ -108,6 +108,6 @@ Error BuildIDRewriter::postEmitFinalizer() { } // namespace std::unique_ptr -llvm::bolt::createBuildIDRewriter(BinaryContext &BC) { - return std::make_unique("build-id-rewriter", BC); +llvm::bolt::createBuildIDRewriter(RewriteInstance &RI) { + return std::make_unique("build-id-rewriter", RI); } diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt index 34993af2623bfb897242c59cdbf134ba44edcb7f..0aba7319664ecc71b9639a706f21f265af25b64c 100644 --- a/bolt/lib/Rewrite/CMakeLists.txt +++ b/bolt/lib/Rewrite/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite LinuxKernelRewriter.cpp MachORewriteInstance.cpp MetadataManager.cpp + MetadataRewriter.cpp BuildIDRewriter.cpp PseudoProbeRewriter.cpp RewriteInstance.cpp diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 03b414b71caca77ace98c99c11440241db258a99..91a10434b2b221bac945e1a1ce459cc3605286e9 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "bolt/Core/BinaryFunction.h" +#include "bolt/Passes/PatchEntries.h" #include "bolt/Rewrite/MetadataRewriter.h" #include "bolt/Rewrite/MetadataRewriters.h" #include "bolt/Utils/CommandLineOpts.h" @@ -21,6 +22,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorOr.h" +#include #define DEBUG_TYPE "bolt-linux" @@ -29,16 +32,6 @@ using namespace bolt; namespace opts { -static cl::opt - AltInstHasPadLen("alt-inst-has-padlen", - cl::desc("specify that .altinstructions has padlen field"), - cl::init(false), cl::Hidden, cl::cat(BoltCategory)); - -static cl::opt - AltInstFeatureSize("alt-inst-feature-size", - cl::desc("size of feature field in .altinstructions"), - cl::init(2), cl::Hidden, cl::cat(BoltCategory)); - static cl::opt DumpAltInstructions("dump-alt-instructions", cl::desc("dump Linux alternative instructions info"), @@ -77,11 +70,6 @@ static cl::opt cl::desc("dump Linux kernel static keys jump table"), cl::init(false), cl::Hidden, cl::cat(BoltCategory)); -static cl::opt LongJumpLabels( - "long-jump-labels", - cl::desc("always use long jumps/nops for Linux kernel static keys"), - cl::init(false), cl::Hidden, cl::cat(BoltCategory)); - static cl::opt PrintORC("print-orc", cl::desc("print ORC unwind information for instructions"), @@ -89,6 +77,34 @@ static cl::opt } // namespace opts +/// Linux kernel version +struct LKVersion { + LKVersion() {} + LKVersion(unsigned Major, unsigned Minor, unsigned Rev = 0) + : Major(Major), Minor(Minor), Rev(Rev) {} + + bool operator<(const LKVersion &Other) const { + return std::make_tuple(Major, Minor, Rev) < + std::make_tuple(Other.Major, Other.Minor, Other.Rev); + } + + bool operator>(const LKVersion &Other) const { return Other < *this; } + + bool operator<=(const LKVersion &Other) const { return !(*this > Other); } + + bool operator>=(const LKVersion &Other) const { return !(*this < Other); } + + bool operator==(const LKVersion &Other) const { + return Major == Other.Major && Minor == Other.Minor && Rev == Other.Rev; + } + + bool operator!=(const LKVersion &Other) const { return !(*this == Other); } + + unsigned Major{0}; + unsigned Minor{0}; + unsigned Rev{0}; +}; + /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). /// ORC state at every IP can be described by the following data structure. struct ORCState { @@ -123,7 +139,33 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { namespace { +/// Extension to DataExtractor that supports reading addresses stored in +/// PC-relative format. +class AddressExtractor : public DataExtractor { + uint64_t DataAddress; + +public: + AddressExtractor(StringRef Data, uint64_t DataAddress, bool IsLittleEndian, + uint8_t AddressSize) + : DataExtractor(Data, IsLittleEndian, AddressSize), + DataAddress(DataAddress) {} + + /// Extract 32-bit PC-relative address/pointer. + uint64_t getPCRelAddress32(Cursor &C) { + const uint64_t Base = DataAddress + C.tell(); + return Base + (int32_t)getU32(C); + } + + /// Extract 64-bit PC-relative address/pointer. + uint64_t getPCRelAddress64(Cursor &C) { + const uint64_t Base = DataAddress + C.tell(); + return Base + (int64_t)getU64(C); + } +}; + class LinuxKernelRewriter final : public MetadataRewriter { + LKVersion LinuxKernelVersion; + /// Information required for updating metadata referencing an instruction. struct InstructionFixup { BinarySection &Section; // Section referencing the instruction. @@ -173,13 +215,18 @@ class LinuxKernelRewriter final : public MetadataRewriter { static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8; struct JumpInfoEntry { - bool Likely; - bool InitValue; + bool Likely{false}; + bool InitValue{false}; + bool Nop{false}; + MCSymbol *JumpInstLabel{nullptr}; + BinarySection *Sec{nullptr}; + uint64_t JumpAddress{0}; + BinaryFunction *BF{nullptr}; }; - SmallVector JumpInfo; + std::vector JumpInfo; - /// Static key entries that need nop conversion. - DenseSet NopIDs; + // Use long jumps/nops for Linux kernel static keys + bool LongJumpLabels{false}; /// Section containing static call table. ErrorOr StaticCallSection = std::errc::bad_address; @@ -193,14 +240,25 @@ class LinuxKernelRewriter final : public MetadataRewriter { }; using StaticCallListType = std::vector; StaticCallListType StaticCallEntries; - - /// Section containing the Linux exception table. - ErrorOr ExceptionsSection = std::errc::bad_address; - static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; - /// Functions with exception handling code. DenseSet FunctionsWithExceptions; + struct RetpolineSiteInfo { + uint64_t Offset{0}; + MCSymbol *Dest{nullptr}; + }; + std::vector RetpolineSites; + + ErrorOr RetpolineSiteSec = std::errc::bad_address; + + struct ReturnSiteInfo { + uint64_t Offset{0}; + MCSymbol *Dest{nullptr}; + }; + std::vector ReturnSites; + + ErrorOr ReturnSiteSec = std::errc::bad_address; + /// Section with paravirtual patch sites. ErrorOr ParavirtualPatchSection = std::errc::bad_address; @@ -210,6 +268,15 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// .altinstructions section. ErrorOr AltInstrSection = std::errc::bad_address; + struct AltInstrEntry { + uint64_t Offset{0}; + uint64_t OrgInstrAddr{0}; + uint64_t AltInstrAddr{0}; + uint8_t Instrlen{0}; + uint8_t Replacementlen{0}; + }; + std::vector AltInstrEntries; + /// Section containing Linux bug table. ErrorOr BugTableSection = std::errc::bad_address; @@ -225,6 +292,8 @@ class LinuxKernelRewriter final : public MetadataRewriter { ErrorOr PCIFixupSection = std::errc::bad_address; static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16; + Error detectLinuxKernelVersion(); + /// Process linux kernel special sections and their relocations. void processLKSections(); @@ -256,7 +325,13 @@ class LinuxKernelRewriter final : public MetadataRewriter { Error readStaticCalls(); Error rewriteStaticCalls(); - Error readExceptionTable(); + Error readRetpolineSites(); + Error rewriteRetpolineSites(); + + Error readReturnSites(); + Error rewriteReturnSites(); + + Error readExceptionTable(StringRef SectionName); Error rewriteExceptionTable(); /// Paravirtual instruction patch sites. @@ -274,8 +349,6 @@ class LinuxKernelRewriter final : public MetadataRewriter { /// Handle alternative instruction info from .altinstructions. Error readAltInstructions(); void processAltInstructionsPostCFG(); - Error tryReadAltInstructions(uint32_t AltInstFeatureSize, - bool AltInstHasPadLen, bool ParseOnly); /// Read .pci_fixup Error readPCIFixupTable(); @@ -286,10 +359,45 @@ class LinuxKernelRewriter final : public MetadataRewriter { Error updateStaticKeysJumpTablePostEmit(); public: - LinuxKernelRewriter(BinaryContext &BC) - : MetadataRewriter("linux-kernel-rewriter", BC) {} + LinuxKernelRewriter(RewriteInstance &RI) + : MetadataRewriter("linux-kernel-rewriter", RI) {} Error preCFGInitializer() override { + if (Error E = detectLinuxKernelVersion()) + return E; + + auto ShouldIgnore = [this](const BinaryFunction &Function) { + std::optional SectionName = Function.getOriginSectionName(); + if (!SectionName || *SectionName != ".text") + return true; + + uint64_t Address = Function.getAddress(); + StringRef Name = Function.getOneName(); + + if (BC.isX86()) { + // Ignore CFI symbols + if (Name.starts_with("__pfx_") || Name.starts_with("__cfi_")) + return true; + + BinaryData *BDStart = BC.getBinaryDataByName("irq_entries_start"); + if (BDStart && BDStart->containsAddress(Address)) + return true; + + if (BC.isInRange("__static_call_text_start", "__static_call_text_end", + Address)) + return true; + } + + if (BC.isInRange("__noinstr_text_start", "__noinstr_text_end", Address)) + return true; + + return false; + }; + + for (BinaryFunction *Function : BC.getAllBinaryFunctions()) + if (ShouldIgnore(*Function)) + Function->setIgnored(); + processLKSections(); if (Error E = processSMPLocks()) @@ -298,7 +406,16 @@ public: if (Error E = readStaticCalls()) return E; - if (Error E = readExceptionTable()) + if (Error E = readRetpolineSites()) + return E; + + if (Error E = readReturnSites()) + return E; + + if (Error E = readExceptionTable("__ex_table")) + return E; + + if (Error E = readExceptionTable("__kvm_ex_table")) return E; if (Error E = readParaInstructions()) @@ -348,6 +465,12 @@ public: if (Error E = rewriteStaticCalls()) return E; + if (Error E = rewriteRetpolineSites()) + return E; + + if (Error E = rewriteReturnSites()) + return E; + if (Error E = rewriteStaticKeysJumpTable()) return E; @@ -370,6 +493,30 @@ public: } }; +Error LinuxKernelRewriter::detectLinuxKernelVersion() { + if (BinaryData *BD = BC.getBinaryDataByName("linux_banner")) { + const BinarySection &Section = BD->getSection(); + const std::string S = + Section.getContents().substr(BD->getOffset(), BD->getSize()).str(); + + const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---"); + std::smatch Match; + if (std::regex_search(S, Match, Re)) { + const unsigned Major = std::stoi(Match[2].str()); + const unsigned Minor = std::stoi(Match[3].str()); + const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0; + LinuxKernelVersion = LKVersion(Major, Minor, Rev); + BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str() + << "\n"; + if (LinuxKernelVersion < LKVersion(5, 0)) + return createStringError("Unsupported Linux kernel version"); + return Error::success(); + } + } + return createStringError(errc::executable_format_error, + "Linux kernel version is unknown"); +} + void LinuxKernelRewriter::processLKSections() { processLKKSymtab(); processLKKSymtab(true); @@ -423,13 +570,13 @@ Error LinuxKernelRewriter::processSMPLocks() { return createStringError(errc::executable_format_error, "bad size of .smp_locks section"); - DataExtractor DE = DataExtractor(SMPLocksSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(0); + AddressExtractor AE(SMPLocksSection->getContents(), SectionAddress, + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); while (Cursor && Cursor.tell() < SectionSize) { const uint64_t Offset = Cursor.tell(); - const uint64_t IP = SectionAddress + Offset + (int32_t)DE.getU32(Cursor); + const uint64_t IP = AE.getPCRelAddress32(Cursor); // Consume the status of the cursor. if (!Cursor) @@ -474,8 +621,8 @@ void LinuxKernelRewriter::processInstructionFixups() { continue; Fixup.Section.addRelocation(Fixup.Offset, &Fixup.Label, - Fixup.IsPCRelative ? ELF::R_X86_64_PC32 - : ELF::R_X86_64_64, + Fixup.IsPCRelative ? Relocation::getPC32() + : Relocation::getAbs64(), /*Addend*/ 0); } } @@ -499,20 +646,17 @@ Error LinuxKernelRewriter::readORCTables() { return createStringError(errc::executable_format_error, "ORC entries number mismatch detected"); - const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); - DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); + DataExtractor OrcDE(ORCUnwindSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + AddressExtractor IPAE( + ORCUnwindIPSection->getContents(), ORCUnwindIPSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); DataExtractor::Cursor ORCCursor(0); DataExtractor::Cursor IPCursor(0); uint64_t PrevIP = 0; for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { - const uint64_t IP = - IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); - + const uint64_t IP = IPAE.getPCRelAddress32(IPCursor); // Consume the status of the cursor. if (!IPCursor) return createStringError(errc::executable_format_error, @@ -856,21 +1000,20 @@ Error LinuxKernelRewriter::validateORCTables() { if (!ORCUnwindIPSection) return Error::success(); - const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); - DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor IPCursor(0); + AddressExtractor IPAE( + ORCUnwindIPSection->getOutputContents(), ORCUnwindIPSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor IPCursor(0); uint64_t PrevIP = 0; for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { - const uint64_t IP = - IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); + const uint64_t IP = IPAE.getPCRelAddress32(IPCursor); if (!IPCursor) return createStringError(errc::executable_format_error, "out of bounds while reading ORC IP table: %s", toString(IPCursor.takeError()).c_str()); - assert(IP >= PrevIP && "Unsorted ORC table detected"); + if (!BC.HasRelocations) + assert(IP >= PrevIP && "Unsorted ORC table detected"); (void)PrevIP; PrevIP = IP; } @@ -916,16 +1059,14 @@ Error LinuxKernelRewriter::readStaticCalls() { "static call table size error"); const uint64_t SectionAddress = StaticCallSection->getAddress(); - DataExtractor DE(StaticCallSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); + AddressExtractor AE(StaticCallSection->getContents(), SectionAddress, + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); uint32_t EntryID = 0; while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { - const uint64_t CallAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t KeyAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); + const uint64_t CallAddress = AE.getPCRelAddress32(Cursor); + const uint64_t KeyAddress = AE.getPCRelAddress32(Cursor); // Consume the status of the cursor. if (!Cursor) @@ -998,9 +1139,106 @@ Error LinuxKernelRewriter::rewriteStaticCalls() { StaticCallSection->getAddress() + (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; StaticCallSection->addRelocation(EntryOffset, Entry.Label, - ELF::R_X86_64_PC32, /*Addend*/ 0); + Relocation::getPC32(), /*Addend*/ 0); + } + + return Error::success(); +} + +Error LinuxKernelRewriter::readRetpolineSites() { + RetpolineSiteSec = BC.getUniqueSectionByName(".retpoline_sites"); + if (!RetpolineSiteSec) + return Error::success(); + + AddressExtractor AE( + RetpolineSiteSec->getContents(), RetpolineSiteSec->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); + while (Cursor.tell() < RetpolineSiteSec->getSize()) { + RetpolineSites.push_back(RetpolineSiteInfo()); + RetpolineSiteInfo &RetpolineSite = RetpolineSites.back(); + RetpolineSite.Offset = Cursor.tell(); + + uint64_t DestAddr = AE.getPCRelAddress32(Cursor); + + // Consume the status of the cursor. + if (!Cursor) + return createStringError( + errc::executable_format_error, + "out of bounds while reading .retpoline_sites: %s", + toString(Cursor.takeError()).c_str()); + + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(DestAddr); + if (!BF || !BC.shouldEmit(*BF) || !BF->hasInstructions()) + continue; + + MCInst *Inst = BF->getInstructionAtOffset(DestAddr - BF->getAddress()); + if (!Inst) + return createStringError(errc::executable_format_error, + "no instruction at call site address 0x%" PRIx64, + DestAddr); + RetpolineSite.Dest = + BC.MIB->getOrCreateInstLabel(*Inst, "__retpoline_", BC.Ctx.get()); + } + return Error::success(); +} + +Error LinuxKernelRewriter::rewriteRetpolineSites() { + if (!RetpolineSiteSec) + return Error::success(); + for (const RetpolineSiteInfo &RetpolineSite : RetpolineSites) { + if (RetpolineSite.Dest) + RetpolineSiteSec->addRelocation(RetpolineSite.Offset, RetpolineSite.Dest, + Relocation::getPC32(), /*Addend*/ 0); + } + return Error::success(); +} + +Error LinuxKernelRewriter::readReturnSites() { + ReturnSiteSec = BC.getUniqueSectionByName(".return_sites"); + if (!ReturnSiteSec) + return Error::success(); + + AddressExtractor AE(ReturnSiteSec->getContents(), ReturnSiteSec->getAddress(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); + while (Cursor.tell() < ReturnSiteSec->getSize()) { + ReturnSites.push_back(ReturnSiteInfo()); + ReturnSiteInfo &ReturnSite = ReturnSites.back(); + ReturnSite.Offset = Cursor.tell(); + + uint64_t DestAddr = AE.getPCRelAddress32(Cursor); + + // Consume the status of the cursor. + if (!Cursor) + return createStringError(errc::executable_format_error, + "out of bounds while reading .return_sites: %s", + toString(Cursor.takeError()).c_str()); + + BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(DestAddr); + if (!BF || !BC.shouldEmit(*BF) || !BF->hasInstructions()) + continue; + + MCInst *Inst = BF->getInstructionAtOffset(DestAddr - BF->getAddress()); + if (!Inst) + return createStringError(errc::executable_format_error, + "no instruction at call site address 0x%" PRIx64, + DestAddr); + ReturnSite.Dest = + BC.MIB->getOrCreateInstLabel(*Inst, "__return_", BC.Ctx.get()); } + return Error::success(); +} +Error LinuxKernelRewriter::rewriteReturnSites() { + if (!ReturnSiteSec) + return Error::success(); + for (const ReturnSiteInfo &ReturnSite : ReturnSites) { + if (ReturnSite.Dest) + ReturnSiteSec->addRelocation(ReturnSite.Offset, ReturnSite.Dest, + Relocation::getPC32(), /*Addend*/ 0); + } return Error::success(); } @@ -1018,27 +1256,43 @@ Error LinuxKernelRewriter::rewriteStaticCalls() { /// /// More info at: /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt -Error LinuxKernelRewriter::readExceptionTable() { - ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); +Error LinuxKernelRewriter::readExceptionTable(StringRef SectionName) { + ErrorOr ExceptionsSection = + BC.getUniqueSectionByName(SectionName); if (!ExceptionsSection) return Error::success(); - if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) + size_t ExceptionTableEntrySize = 0; + switch (BC.TheTriple->getArch()) { + case llvm::Triple::x86_64: + ExceptionTableEntrySize = 12; + break; + + case llvm::Triple::aarch64: + if (LinuxKernelVersion >= LKVersion(5, 16)) + ExceptionTableEntrySize = 12; + else + ExceptionTableEntrySize = 8; + break; + + default: + llvm_unreachable("Unsupported architecture"); + } + assert(ExceptionTableEntrySize && "exception table entry size is unknown"); + + if (ExceptionsSection->getSize() % ExceptionTableEntrySize) return createStringError(errc::executable_format_error, "exception table size error"); - const uint64_t SectionAddress = ExceptionsSection->getAddress(); - DataExtractor DE(ExceptionsSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(0); + AddressExtractor AE( + ExceptionsSection->getContents(), ExceptionsSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); uint32_t EntryID = 0; while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { - const uint64_t InstAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t FixupAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t Data = DE.getU32(Cursor); + const uint64_t InstAddress = AE.getPCRelAddress32(Cursor); + const uint64_t FixupAddress = AE.getPCRelAddress32(Cursor); + Cursor.seek(Cursor.tell() + ExceptionTableEntrySize - 8); // Consume the status of the cursor. if (!Cursor) @@ -1052,8 +1306,7 @@ Error LinuxKernelRewriter::readExceptionTable() { if (opts::DumpExceptions) { BC.outs() << "Exception Entry: " << EntryID << '\n'; BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' - << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' - << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; + << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'; } MCInst *Inst = nullptr; @@ -1101,24 +1354,22 @@ Error LinuxKernelRewriter::readExceptionTable() { } BC.outs() << "BOLT-INFO: parsed " - << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE + << ExceptionsSection->getSize() / ExceptionTableEntrySize << " exception table entries\n"; - return Error::success(); -} - -/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects -/// the exception table to be sorted. Hence we have to sort it after code -/// reordering. -Error LinuxKernelRewriter::rewriteExceptionTable() { // Disable output of functions with exceptions before rewrite support is // added. for (BinaryFunction *BF : FunctionsWithExceptions) - BF->setSimple(false); + BF->setIgnored(); return Error::success(); } +/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects +/// the exception table to be sorted. Hence we have to sort it after code +/// reordering. +Error LinuxKernelRewriter::rewriteExceptionTable() { return Error::success(); } + /// .parainsrtuctions section contains information for patching parvirtual call /// instructions during runtime. The entries in the section are in the form: /// @@ -1134,9 +1385,9 @@ Error LinuxKernelRewriter::readParaInstructions() { if (!ParavirtualPatchSection) return Error::success(); - DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); + DataExtractor DE(ParavirtualPatchSection->getContents(), + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); uint32_t EntryID = 0; DataExtractor::Cursor Cursor(0); while (Cursor && !DE.eof(Cursor)) { @@ -1184,6 +1435,10 @@ Error LinuxKernelRewriter::readParaInstructions() { } } + // Disable output of functions with paravirtual instructions before the + // rewrite support is complete. + skipFunctionsWithAnnotation("ParaSite"); + BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; return Error::success(); @@ -1199,7 +1454,7 @@ void LinuxKernelRewriter::skipFunctionsWithAnnotation( return BC.MIB->hasAnnotation(Inst, Annotation); }); if (HasAnnotation) { - BF.setSimple(false); + BF.setIgnored(); break; } } @@ -1207,10 +1462,6 @@ void LinuxKernelRewriter::skipFunctionsWithAnnotation( } Error LinuxKernelRewriter::rewriteParaInstructions() { - // Disable output of functions with paravirtual instructions before the - // rewrite support is complete. - skipFunctionsWithAnnotation("ParaSite"); - return Error::success(); } @@ -1235,15 +1486,14 @@ Error LinuxKernelRewriter::readBugTable() { return createStringError(errc::executable_format_error, "bug table size error"); - const uint64_t SectionAddress = BugTableSection->getAddress(); - DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(0); + AddressExtractor AE( + BugTableSection->getContents(), BugTableSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); uint32_t EntryID = 0; while (Cursor && Cursor.tell() < BugTableSection->getSize()) { const uint64_t Pos = Cursor.tell(); - const uint64_t InstAddress = - SectionAddress + Pos + (int32_t)DE.getU32(Cursor); + const uint64_t InstAddress = AE.getPCRelAddress32(Cursor); Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); if (!Cursor) @@ -1305,7 +1555,8 @@ Error LinuxKernelRewriter::rewriteBugTable() { MCSymbol *Label = BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get()); const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; - BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32, + BugTableSection->addRelocation(EntryOffset, Label, + Relocation::getPC32(), /*Addend*/ 0); } } @@ -1313,9 +1564,10 @@ Error LinuxKernelRewriter::rewriteBugTable() { // Clear bug entries that were not emitted for this function, e.g. as a // result of DCE, but setting their instruction address to zero. for (const uint32_t ID : FunctionBugList[&BF]) { - if (!EmittedIDs.count(ID)) { + if (!BC.HasRelocations && !EmittedIDs.count(ID)) { const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; - BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32, + BugTableSection->addRelocation(EntryOffset, nullptr, + Relocation::getPC32(), /*Addend*/ 0); } } @@ -1327,98 +1579,73 @@ Error LinuxKernelRewriter::rewriteBugTable() { /// The kernel can replace certain instruction sequences depending on hardware /// it is running on and features specified during boot time. The information /// about alternative instruction sequences is stored in .altinstructions -/// section. The format of entries in this section is defined in -/// arch/x86/include/asm/alternative.h: -/// +/// section. The format of entries in this section is defined as /// struct alt_instr { /// s32 instr_offset; /// s32 repl_offset; -/// uXX feature; +/// ... /// u8 instrlen; /// u8 replacementlen; -/// u8 padlen; // present in older kernels +/// ... /// } __packed; /// -/// Note that the structure is packed. +/// Note that the structure is packed and field names may not be exactly the +/// same. /// -/// Since the size of the "feature" field could be either u16 or u32, and -/// "padlen" presence is unknown, we attempt to parse .altinstructions section -/// using all possible combinations (four at this time). Since we validate the -/// contents of the section and its size, the detection works quite well. -/// Still, we leave the user the opportunity to specify these features on the -/// command line and skip the guesswork. +/// To parse entries we only need to know the entry size and offset of +/// the field 'instrlen'. Error LinuxKernelRewriter::readAltInstructions() { AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); if (!AltInstrSection) return Error::success(); - // Presence of "padlen" field. - std::vector PadLenVariants; - if (opts::AltInstHasPadLen.getNumOccurrences()) - PadLenVariants.push_back(opts::AltInstHasPadLen); - else - PadLenVariants = {false, true}; - - // Size (in bytes) variants of "feature" field. - std::vector FeatureSizeVariants; - if (opts::AltInstFeatureSize.getNumOccurrences()) - FeatureSizeVariants.push_back(opts::AltInstFeatureSize); - else - FeatureSizeVariants = {2, 4}; - - for (bool AltInstHasPadLen : PadLenVariants) { - for (uint32_t AltInstFeatureSize : FeatureSizeVariants) { - LLVM_DEBUG({ - dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen - << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n"; - }); - if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, - /*ParseOnly*/ true)) { - consumeError(std::move(E)); - continue; - } - - LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n"); - - if (!opts::AltInstHasPadLen.getNumOccurrences()) - BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr - << '=' << AltInstHasPadLen << '\n'; - - if (!opts::AltInstFeatureSize.getNumOccurrences()) - BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr - << '=' << AltInstFeatureSize << '\n'; - - return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, - /*ParseOnly*/ false); + unsigned AltInstrEntrySize{0}; + unsigned AltInstrEntryInstrlenOffset{0}; + + switch (BC.TheTriple->getArch()) { + case llvm::Triple::x86_64: + if (LinuxKernelVersion >= LKVersion(6, 3)) { + AltInstrEntrySize = 14; + AltInstrEntryInstrlenOffset = 12; + } else if (LinuxKernelVersion >= LKVersion(5, 10, 133)) { + AltInstrEntrySize = 12; + AltInstrEntryInstrlenOffset = 10; + } else { + AltInstrEntrySize = 13; + AltInstrEntryInstrlenOffset = 10; } + break; + case llvm::Triple::aarch64: + AltInstrEntrySize = 12; + AltInstrEntryInstrlenOffset = 10; + break; + default: + llvm_unreachable("Unsupported architecture"); } - // We couldn't match the format. Read again to properly propagate the error - // to the user. - return tryReadAltInstructions(opts::AltInstFeatureSize, - opts::AltInstHasPadLen, /*ParseOnly*/ false); -} + BC.outs() << "BOLT-INFO: AltInstrEntrySize = " << AltInstrEntrySize + << ", AltInstrEntryInstrlenOffset = " << AltInstrEntryInstrlenOffset + << "\n"; -Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, - bool AltInstHasPadLen, - bool ParseOnly) { - const uint64_t Address = AltInstrSection->getAddress(); - DataExtractor DE = DataExtractor(AltInstrSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); + AddressExtractor AE( + AltInstrSection->getContents(), AltInstrSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); uint64_t EntryID = 0; - DataExtractor::Cursor Cursor(0); - while (Cursor && !DE.eof(Cursor)) { - const uint64_t OrgInstAddress = - Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t AltInstAddress = - Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t Feature = DE.getUnsigned(Cursor, AltInstFeatureSize); - const uint8_t OrgSize = DE.getU8(Cursor); - const uint8_t AltSize = DE.getU8(Cursor); + while (Cursor && !AE.eof(Cursor)) { + ++EntryID; + AltInstrEntries.push_back(AltInstrEntry()); + AltInstrEntry &Entry = AltInstrEntries.back(); - // Older kernels may have the padlen field. - const uint8_t PadLen = AltInstHasPadLen ? DE.getU8(Cursor) : 0; + Entry.Offset = Cursor.tell(); + Entry.OrgInstrAddr = AE.getPCRelAddress32(Cursor); + Entry.AltInstrAddr = AE.getPCRelAddress32(Cursor); + Cursor.seek(Cursor.tell() + AltInstrEntryInstrlenOffset - 8); + + Entry.Instrlen = AE.getU8(Cursor); + Entry.Replacementlen = AE.getU8(Cursor); + Cursor.seek(Cursor.tell() + AltInstrEntrySize - + (AltInstrEntryInstrlenOffset + 2)); if (!Cursor) return createStringError( @@ -1426,57 +1653,51 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, "out of bounds while reading .altinstructions: %s", toString(Cursor.takeError()).c_str()); - ++EntryID; - if (opts::DumpAltInstructions) { BC.outs() << "Alternative instruction entry: " << EntryID - << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) - << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) - << "\n\tFeature: 0x" << Twine::utohexstr(Feature) - << "\n\tOrgSize: " << (int)OrgSize - << "\n\tAltSize: " << (int)AltSize << '\n'; - if (AltInstHasPadLen) - BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; + << "\n\tOrg: 0x" << Twine::utohexstr(Entry.OrgInstrAddr) + << "\n\tAlt: 0x" << Twine::utohexstr(Entry.AltInstrAddr) + << "\n\tInstrlen: " << (int)Entry.Instrlen + << "\n\tReplacementlen: " << (int)Entry.Replacementlen << '\n'; } - if (AltSize > OrgSize) + if (Entry.Replacementlen > Entry.Instrlen) return createStringError(errc::executable_format_error, "error reading .altinstructions"); - BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); + BinaryFunction *BF = + BC.getBinaryFunctionContainingAddress(Entry.OrgInstrAddr); if (!BF && opts::Verbosity) { BC.outs() << "BOLT-INFO: no function matches address 0x" - << Twine::utohexstr(OrgInstAddress) + << Twine::utohexstr(Entry.OrgInstrAddr) << " of instruction from .altinstructions\n"; } BinaryFunction *AltBF = - BC.getBinaryFunctionContainingAddress(AltInstAddress); - if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) { - BC.errs() - << "BOLT-WARNING: alternative instruction sequence found in function " - << *AltBF << '\n'; + BC.getBinaryFunctionContainingAddress(Entry.AltInstrAddr); + if (AltBF) { + if (BC.isX86() && + !AltBF->getOneName().starts_with(".altinstr_replacement")) + BC.errs() << "BOLT-WARNING: alternative instruction sequence found in " + "function " + << *AltBF << '\n'; AltBF->setIgnored(); } if (!BF || !BF->hasInstructions()) continue; - if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) + if (Entry.OrgInstrAddr + Entry.Instrlen > BF->getAddress() + BF->getSize()) return createStringError(errc::executable_format_error, "error reading .altinstructions"); MCInst *Inst = - BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); + BF->getInstructionAtOffset(Entry.OrgInstrAddr - BF->getAddress()); if (!Inst) return createStringError(errc::executable_format_error, "no instruction at address 0x%" PRIx64 " referenced by .altinstructions entry %d", - OrgInstAddress, EntryID); - - if (ParseOnly) - continue; - + Entry.OrgInstrAddr, EntryID); // There could be more than one alternative instruction sequences for the // same original instruction. Annotate each alternative separately. std::string AnnotationName = "AltInst"; @@ -1489,18 +1710,15 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, // Annotate all instructions from the original sequence. Note that it's not // the most efficient way to look for instructions in the address range, // but since alternative instructions are uncommon, it will do for now. - for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { - Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - + for (uint32_t Offset = 1; Offset < Entry.Instrlen; ++Offset) { + Inst = BF->getInstructionAtOffset(Entry.OrgInstrAddr + Offset - BF->getAddress()); if (Inst) BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); } } - - if (!ParseOnly) - BC.outs() << "BOLT-INFO: parsed " << EntryID - << " alternative instruction entries\n"; - + BC.outs() << "BOLT-INFO: parsed " << EntryID + << " alternative instruction entries\n"; return Error::success(); } @@ -1537,19 +1755,17 @@ Error LinuxKernelRewriter::readPCIFixupTable() { return createStringError(errc::executable_format_error, "PCI fixup table size error"); - const uint64_t Address = PCIFixupSection->getAddress(); - DataExtractor DE = DataExtractor(PCIFixupSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); + AddressExtractor AE( + PCIFixupSection->getContents(), PCIFixupSection->getAddress(), + BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(0); uint64_t EntryID = 0; - DataExtractor::Cursor Cursor(0); - while (Cursor && !DE.eof(Cursor)) { - const uint16_t Vendor = DE.getU16(Cursor); - const uint16_t Device = DE.getU16(Cursor); - const uint32_t Class = DE.getU32(Cursor); - const uint32_t ClassShift = DE.getU32(Cursor); - const uint64_t HookAddress = - Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); + while (Cursor && !AE.eof(Cursor)) { + const uint16_t Vendor = AE.getU16(Cursor); + const uint16_t Device = AE.getU16(Cursor); + const uint32_t Class = AE.getU32(Cursor); + const uint32_t ClassShift = AE.getU32(Cursor); + const uint64_t HookAddress = AE.getPCRelAddress32(Cursor); if (!Cursor) return createStringError(errc::executable_format_error, @@ -1580,7 +1796,7 @@ Error LinuxKernelRewriter::readPCIFixupTable() { if (const uint64_t Offset = HookAddress - BF->getAddress()) { BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function " << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n'; - BF->setSimple(false); + BF->setIgnored(); } } @@ -1624,6 +1840,8 @@ Error LinuxKernelRewriter::readPCIFixupTable() { /// byte of the sequence with int3 before proceeding with actual code /// replacement. Error LinuxKernelRewriter::readStaticKeysJumpTable() { + LongJumpLabels = BC.isX86() && LinuxKernelVersion < LKVersion(5, 14); + const BinaryData *StaticKeysJumpTable = BC.getBinaryDataByName("__start___jump_table"); if (!StaticKeysJumpTable) @@ -1654,18 +1872,15 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { "static keys jump table size error"); const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); - DataExtractor DE(StaticKeysJumpSection->getContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); + AddressExtractor AE(StaticKeysJumpSection->getContents(), SectionAddress, + BC.AsmInfo->isLittleEndian(), + BC.AsmInfo->getCodePointerSize()); + AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); uint32_t EntryID = 0; while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { - const uint64_t JumpAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t TargetAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t KeyAddress = - SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); + const uint64_t JumpAddress = AE.getPCRelAddress32(Cursor); + const uint64_t TargetAddress = AE.getPCRelAddress32(Cursor); + const uint64_t KeyAddress = AE.getPCRelAddress64(Cursor); // Consume the status of the cursor. if (!Cursor) @@ -1679,6 +1894,7 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { JumpInfo.push_back(JumpInfoEntry()); JumpInfoEntry &Info = JumpInfo.back(); Info.Likely = KeyAddress & 1; + Info.JumpAddress = JumpAddress; if (opts::DumpStaticKeys) { BC.outs() << "Static key jump entry: " << EntryID @@ -1698,6 +1914,12 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { if (!BF || !BC.shouldEmit(*BF)) continue; + assert(BF->getOriginSection() && + "the function did not originate from the file"); + Info.BF = BF; + Info.Sec = BF->getOriginSection(); + + BF->setMayChange(); MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress()); if (!Inst) @@ -1719,7 +1941,21 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { JumpAddress); const uint64_t Size = BC.computeInstructionSize(*Inst); - if (Size != 2 && Size != 5) { + + auto checkSize = [this, Size]() { + switch (BC.TheTriple->getArch()) { + case llvm::Triple::x86_64: + if (LongJumpLabels) + return Size == 5; + return Size == 2 || Size == 5; + case llvm::Triple::aarch64: + return Size == 4; + default: + return false; + } + }; + + if (!checkSize()) { return createStringError( errc::executable_format_error, "unexpected static keys jump size at address 0x%" PRIx64, @@ -1741,7 +1977,7 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { // by the kernel patching code. Newer kernels can work with both short // and long branches. The code for long conditional branch is larger // than unconditional one, so we are pessimistic in our estimations. - if (opts::LongJumpLabels) + if (LongJumpLabels) BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); else BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); @@ -1768,7 +2004,7 @@ Error LinuxKernelRewriter::readStaticKeysJumpTable() { if (!BC.MIB->getOffset(*Inst)) BC.MIB->setOffset(*Inst, JumpAddress - BF->getAddress()); - if (opts::LongJumpLabels) + if (LongJumpLabels) BC.MIB->setSize(*Inst, 5); } @@ -1801,21 +2037,33 @@ Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { const_cast(BC.MIB->getTargetSymbol(Inst)); assert(Target && "Target symbol should be set."); - const JumpInfoEntry &Info = JumpInfo[EntryID - 1]; + JumpInfoEntry &Info = JumpInfo[EntryID - 1]; const bool IsBranch = Info.Likely ^ Info.InitValue; uint32_t Size = *BC.MIB->getSize(Inst); - if (Size == 2) - ++NumShort; - else if (Size == 5) - ++NumLong; - else - llvm_unreachable("Wrong size for static keys jump instruction."); + switch (BC.TheTriple->getArch()) { + case llvm::Triple::x86_64: + if (Size == 2) + ++NumShort; + else if (Size == 5) + ++NumLong; + else + llvm_unreachable("Wrong size for static keys jump instruction."); + break; + case llvm::Triple::aarch64: + if (Size == 4) + ++NumLong; + else + llvm_unreachable("Wrong size for static keys jump instruction."); + break; + default: + llvm_unreachable("Unsupported architecture"); + } MCInst NewInst; // Replace the instruction with unconditional jump even if it needs to // be nop in the binary. - if (opts::LongJumpLabels) { + if (LongJumpLabels) { BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get()); } else { // Newer kernels can handle short and long jumps for static keys. @@ -1829,20 +2077,20 @@ Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { // Mark the instruction for nop conversion. if (!IsBranch) - NopIDs.insert(EntryID); + Info.Nop = true; - MCSymbol *Label = + Info.JumpInstLabel = BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get()); // Create a relocation against the label. const uint64_t EntryOffset = StaticKeysJumpTableAddress - StaticKeysJumpSection->getAddress() + (EntryID - 1) * 16; - StaticKeysJumpSection->addRelocation(EntryOffset, Label, - ELF::R_X86_64_PC32, + StaticKeysJumpSection->addRelocation(EntryOffset, Info.JumpInstLabel, + Relocation::getPC32(), /*Addend*/ 0); - StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target, - ELF::R_X86_64_PC32, /*Addend*/ 0); + StaticKeysJumpSection->addRelocation( + EntryOffset + 4, Target, Relocation::getPC32(), /*Addend*/ 0); } } } @@ -1858,72 +2106,98 @@ Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized()) return Error::success(); - const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); - DataExtractor DE(StaticKeysJumpSection->getOutputContents(), - BC.AsmInfo->isLittleEndian(), - BC.AsmInfo->getCodePointerSize()); - DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); - const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); - uint32_t EntryID = 0; uint64_t NumShort = 0; uint64_t NumLong = 0; - while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { - const uint64_t JumpAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t TargetAddress = - SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); - const uint64_t KeyAddress = - SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); - - // Consume the status of the cursor. - if (!Cursor) - return createStringError(errc::executable_format_error, - "out of bounds while updating static keys: %s", - toString(Cursor.takeError()).c_str()); - - ++EntryID; - - LLVM_DEBUG({ - dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) - << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) - << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n'; - }); - (void)TargetAddress; - (void)KeyAddress; - - BinaryFunction *BF = - BC.getBinaryFunctionContainingAddress(JumpAddress, - /*CheckPastEnd*/ false, - /*UseMaxSize*/ true); - assert(BF && "Cannot get function for modified static key."); + for (JumpInfoEntry &Info : JumpInfo) { + MCSymbol *Label = Info.JumpInstLabel; + if (!Label) + continue; - if (!BF->isEmitted()) + BinaryFunction *BF = Info.BF; + if (!BF || !BF->isEmitted()) continue; - // Disassemble instruction to collect stats even if nop-conversion is - // unnecessary. - MutableArrayRef Contents = MutableArrayRef( - reinterpret_cast(BF->getImageAddress()), BF->getImageSize()); - assert(Contents.size() && "Non-empty function image expected."); + std::optional JumpAddress = lookupSymbol(Label->getName()); + assert(JumpAddress && "missing static key jump instruction label"); + + uint64_t ContentsAddress{0}; + uint64_t ContentsSize{0}; + MutableArrayRef Contents; + + if (!BC.HasRelocations) { + const FunctionFragment *FF = + BF->getFunctionFragmentForOutputAddress(*JumpAddress); + assert(FF && "Can not get fragment for jump address"); + + ContentsAddress = FF->getAddress(); + ContentsSize = FF->getImageSize(); + Contents = MutableArrayRef(FF->getOutputData(), ContentsSize); + } else { + ErrorOr Sec = + BC.getSectionForOutputAddress(*JumpAddress); + assert(Sec && "Can not get section for jump address."); + + ContentsAddress = Sec->getOutputAddress(); + ContentsSize = Sec->getOutputSize(); + Contents = MutableArrayRef(Sec->getOutputData(), ContentsSize); + } MCInst Inst; uint64_t Size; - const uint64_t JumpOffset = JumpAddress - BF->getAddress(); + const uint64_t JumpOffset = *JumpAddress - ContentsAddress; if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0, nulls())) { llvm_unreachable("Unable to disassemble jump instruction."); } assert(BC.MIB->isBranch(Inst) && "Branch instruction expected."); + assert(JumpOffset + Size <= ContentsAddress + ContentsSize); + + switch (BC.TheTriple->getArch()) { + case llvm::Triple::x86_64: + if (Size == 2) + ++NumShort; + else if (Size == 5) + ++NumLong; + else + llvm_unreachable("Unexpected size for static keys jump instruction."); + break; + case llvm::Triple::aarch64: + if (Size == 4) + ++NumLong; + else + llvm_unreachable("Unexpected size for static keys jump instruction."); + break; + default: + llvm_unreachable("Unsupported architecture"); + } - if (Size == 2) - ++NumShort; - else if (Size == 5) - ++NumLong; - else - llvm_unreachable("Unexpected size for static keys jump instruction."); + if (BC.HasRelocations) { + // To avoid undefined behaviors, fill the jump address with Undef + + size_t PatchSize = PatchEntries::getPatchSize(BC); + assert(BF->isPatched()); + assert(Info.JumpAddress != JumpAddress); + + bool NotOverlap = + BF->forEachEntryPoint([&](uint64_t EntryOffset, const MCSymbol *) { + uint64_t EntryAddress = EntryOffset + BF->getAddress(); + return Info.JumpAddress >= EntryAddress + PatchSize || + Info.JumpAddress + Size <= EntryAddress; + }); + + if (NotOverlap) + Info.Sec->addPatch(Info.JumpAddress - Info.Sec->getAddress(), + BC.MIB->getUndefFillValue()); + else + BC.errs() + << "BOLT-WARNING: Skip writing an undefined instruction at static " + "key jump address 0x" + << Twine::utohexstr(Info.JumpAddress) + << " since that address is overlapping an entry point patch\n"; + } // Check if we need to convert jump instruction into a nop. - if (!NopIDs.contains(EntryID)) + if (!Info.Nop) continue; SmallString<15> NopCode; @@ -1942,6 +2216,6 @@ Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { } // namespace std::unique_ptr -llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { - return std::make_unique(BC); +llvm::bolt::createLinuxKernelRewriter(RewriteInstance &RI) { + return std::make_unique(RI); } diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp index 172cb640bf911a96e11c3112b007cd1636479867..2d41b0de1daaea0e6b8ef5e88d79e1c5c1a71abc 100644 --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -553,7 +553,8 @@ void MachORewriteInstance::adjustCommandLineOptions() { opts::ForcePatch = true; opts::JumpTables = JTS_MOVE; opts::InstrumentCalls = false; - opts::RuntimeInstrumentationLib = "libbolt_rt_instr_osx.a"; + if (opts::RuntimeInstrumentationLib.empty()) + opts::RuntimeInstrumentationLib = "libbolt_rt_instr_osx.a"; } void MachORewriteInstance::run() { diff --git a/bolt/lib/Rewrite/MetadataRewriter.cpp b/bolt/lib/Rewrite/MetadataRewriter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..962e7704167b960da1e5756a090bdcacd0b3d4e0 --- /dev/null +++ b/bolt/lib/Rewrite/MetadataRewriter.cpp @@ -0,0 +1,20 @@ +//===------------ bolt/Rewrite/MetadataRewriter.cpp -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Rewrite/MetadataRewriter.h" +#include "bolt/Rewrite/RewriteInstance.h" + +using namespace llvm; +using namespace bolt; + +MetadataRewriter::MetadataRewriter(StringRef Name, RewriteInstance &RI) + : Name(Name), RI(RI), BC(*RI.BC) {} + +std::optional MetadataRewriter::lookupSymbol(const StringRef Name) { + return RI.Linker->lookupSymbol(Name); +} diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 3704a9ba452b99e939bf239f5370b67c1f83ce01..8d6283608d148e359e272cabcb0c289d3df5296a 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -76,8 +76,8 @@ class PseudoProbeRewriter final : public MetadataRewriter { std::shared_ptr ProbeDecoderPtr; public: - PseudoProbeRewriter(BinaryContext &BC) - : MetadataRewriter("pseudo-probe-rewriter", BC), + PseudoProbeRewriter(RewriteInstance &RI) + : MetadataRewriter("pseudo-probe-rewriter", RI), ProbeDecoderPtr(std::make_shared()) { BC.setPseudoProbeDecoder(ProbeDecoderPtr); } @@ -419,6 +419,6 @@ void PseudoProbeRewriter::encodePseudoProbes() { } // namespace std::unique_ptr -llvm::bolt::createPseudoProbeRewriter(BinaryContext &BC) { - return std::make_unique(BC); +llvm::bolt::createPseudoProbeRewriter(RewriteInstance &RI) { + return std::make_unique(RI); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 3d24936271bf8fef2e2ef1c3551c454e7db51de7..43d5c0cbcdf7e28efd20170631712284fbe7013f 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -133,6 +133,16 @@ static cl::opt FunctionNamesFileNR( cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, cl::cat(BoltCategory)); +static cl::list KeepAddressFunctionNamesNR( + "keep-address-funcs-no-regex", cl::CommaSeparated, + cl::desc("KeepAddress functions from the list (non-regex)"), + cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); + +static cl::opt KeepAddressFunctionNamesFileNR( + "keep-address-funcs-file-no-regex", + cl::desc("file with list of KeepAddress functions to optimize (non-regex)"), + cl::Hidden, cl::cat(BoltCategory)); + cl::opt KeepTmp("keep-tmp", cl::desc("preserve intermediate .o file"), @@ -506,7 +516,8 @@ Error RewriteInstance::discoverStorage() { auto ELF64LEFile = cast(InputFile); const ELFFile &Obj = ELF64LEFile->getELFFile(); - BC->StartFunctionAddress = Obj.getHeader().e_entry; + if (!BC->IsLinuxKernel) + BC->StartFunctionAddress = Obj.getHeader().e_entry; NextAvailableAddress = 0; uint64_t NextAvailableOffset = 0; @@ -530,9 +541,17 @@ Error RewriteInstance::discoverStorage() { Phdr.p_offset, Phdr.p_filesz, Phdr.p_align}; - if (BC->TheTriple->getArch() == llvm::Triple::x86_64 && - Phdr.p_vaddr >= BinaryContext::KernelStartX86_64) - BC->IsLinuxKernel = true; + switch (BC->TheTriple->getArch()) { + case llvm::Triple::x86_64: + if (Phdr.p_vaddr >= BinaryContext::KernelStartX86_64) + BC->IsLinuxKernel = true; + break; + case llvm::Triple::aarch64: + if (Phdr.p_vaddr >= BinaryContext::KernelStartAArch64) + BC->IsLinuxKernel = true; + break; + default:; + } break; case ELF::PT_INTERP: BC->HasInterpHeader = true; @@ -540,8 +559,10 @@ Error RewriteInstance::discoverStorage() { } } - if (BC->IsLinuxKernel) + if (BC->IsLinuxKernel) { + BC->StartFunctionAddress.reset(); BC->outs() << "BOLT-INFO: Linux kernel binary detected\n"; + } for (const SectionRef &Section : InputFile->sections()) { Expected SectionNameOrErr = Section.getName(); @@ -977,7 +998,23 @@ void RewriteInstance::discoverFileObjects() { continue; } - if (SymName == getBOLTReservedStart() || SymName == getBOLTReservedEnd()) { + if (SymName == getBOLTReservedStart()) { + BOLTReservedStartAddress = SymbolAddress; + registerName(SymbolSize); + continue; + } + if (SymName == getBOLTReservedEnd()) { + BOLTReservedEndAddress = SymbolAddress; + registerName(SymbolSize); + continue; + } + if (SymName == getBOLTReservedRWStart()) { + BOLTReservedRWStartAddress = SymbolAddress; + registerName(SymbolSize); + continue; + } + if (SymName == getBOLTReservedRWEnd()) { + BOLTReservedRWEndAddress = SymbolAddress; registerName(SymbolSize); continue; } @@ -985,12 +1022,11 @@ void RewriteInstance::discoverFileObjects() { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName << " for function\n"); - if (SymbolAddress == Section->getAddress() + Section->getSize()) { + if (SymbolAddress >= Section->getAddress() + Section->getSize()) { assert(SymbolSize == 0 && - "unexpect non-zero sized symbol at end of section"); + "unexpect non-zero sized symbol outside section"); LLVM_DEBUG( - dbgs() - << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); + dbgs() << "BOLT-DEBUG: rejecting as symbol is outside its section\n"); registerName(SymbolSize); continue; } @@ -1248,7 +1284,8 @@ void RewriteInstance::discoverFileObjects() { /*CheckPastEnd*/ false, /*UseMaxSize*/ true); if (BF) { - assert(Rel.isRelative() && "Expected relative relocation for island"); + assert((Rel.isRelative() || Rel.isGlobDat()) && + "Unexpected relocation for island"); BC->logBOLTErrorsAndQuitOnFatal( BF->markIslandDynamicRelocationAtAddress(RelAddress)); } @@ -1256,10 +1293,8 @@ void RewriteInstance::discoverFileObjects() { } } - if (!BC->IsLinuxKernel) { - // Read all relocations now that we have binary functions mapped. - processRelocations(); - } + // Read all relocations now that we have binary functions mapped. + processRelocations(); registerFragments(); FileSymbols.clear(); @@ -1269,30 +1304,66 @@ void RewriteInstance::discoverFileObjects() { } void RewriteInstance::discoverBOLTReserved() { - BinaryData *StartBD = BC->getBinaryDataByName(getBOLTReservedStart()); - BinaryData *EndBD = BC->getBinaryDataByName(getBOLTReservedEnd()); - if (!StartBD != !EndBD) { + if (!BOLTReservedStartAddress != !BOLTReservedEndAddress) { BC->errs() << "BOLT-ERROR: one of the symbols is missing from the binary: " << getBOLTReservedStart() << ", " << getBOLTReservedEnd() << '\n'; exit(1); } - if (!StartBD) - return; + if (BC->IsLinuxKernel && BC->HasRelocations && !BOLTReservedStartAddress) { + BC->errs() << "BOLT-ERROR: BOLT for Linux in relocation mode requires BOLT " + "reserved space\n"; + exit(1); + } + + if (BOLTReservedStartAddress) { + if (BOLTReservedStartAddress >= BOLTReservedEndAddress) { + BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n"; + exit(1); + } + + BC->BOLTReserved = + AddressRange(BOLTReservedStartAddress, BOLTReservedEndAddress); + BC->outs() + << "BOLT-INFO: using reserved space for allocating new sections\n"; + + PHDRTableOffset = 0; + PHDRTableAddress = 0; + NewTextSegmentAddress = 0; + NewTextSegmentOffset = 0; + NextAvailableAddress = BC->BOLTReserved.start(); + } + + if (!BOLTReservedRWStartAddress != !BOLTReservedRWEndAddress) { + BC->errs() << "BOLT-ERROR: one of the symbols is missing from the binary: " + << getBOLTReservedRWStart() << ", " << getBOLTReservedRWEnd() + << '\n'; + exit(1); + } + + if (BOLTReservedRWStartAddress && !BOLTReservedStartAddress) { + BC->errs() << "BOLT-ERROR: BOLT reserved RW space needs to be used " + "together with BOLT reserved space\n"; + exit(1); + } - if (StartBD->getAddress() >= EndBD->getAddress()) { - BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n"; + if (BC->IsLinuxKernel && opts::Instrument && !BOLTReservedRWStartAddress) { + BC->errs() << "BOLT-ERROR: Linux kernel instrumentation requires BOLT " + "reserved RW space\n"; exit(1); } - BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress()); - BC->outs() << "BOLT-INFO: using reserved space for allocating new sections\n"; - PHDRTableOffset = 0; - PHDRTableAddress = 0; - NewTextSegmentAddress = 0; - NewTextSegmentOffset = 0; - NextAvailableAddress = BC->BOLTReserved.start(); + if (BOLTReservedRWStartAddress) { + if (BOLTReservedRWStartAddress >= BOLTReservedRWEndAddress) { + BC->errs() << "BOLT-ERROR: invalid reserved RW space boundaries\n"; + exit(1); + } + BC->BOLTReservedRW = + AddressRange(BOLTReservedRWStartAddress, BOLTReservedRWEndAddress); + BC->outs() << "BOLT-INFO: using reserved RW space for allocating new RW " + "sections\n"; + } } Error RewriteInstance::discoverRtFiniAddress() { @@ -1745,6 +1816,10 @@ void RewriteInstance::adjustFunctionBoundaries() { BFE = BC->getBinaryFunctions().end(); BFI != BFE; ++BFI) { BinaryFunction &Function = BFI->second; + + if (Function.getAddress() == BOLTReservedStartAddress) + continue; + const BinaryFunction *NextFunction = nullptr; if (std::next(BFI) != BFE) NextFunction = &std::next(BFI)->second; @@ -1936,11 +2011,6 @@ Error RewriteInstance::readSpecialSections() { BC->HasRelocations = HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); - if (BC->IsLinuxKernel && BC->HasRelocations) { - BC->outs() << "BOLT-INFO: disabling relocation mode for Linux kernel\n"; - BC->HasRelocations = false; - } - BC->IsStripped = !HasSymbolTable; if (BC->IsStripped && !opts::AllowStripped) { @@ -2188,6 +2258,19 @@ bool RewriteInstance::analyzeRelocation( SymbolAddress = BD ? BD->getAddress() : 0; } } + + if (BC->IsLinuxKernel) { + if (BC->isX86()) { + if (StringSwitch(SymbolName) + .Cases(".data..percpu", "fixed_percpu_data", true) + .Default(false) || + SymbolName.find("__per_cpu_") != std::string::npos) { + Skip = true; + return true; + } + } + } + // For PIE or dynamic libs, the linker may choose not to put the relocation // result at the address if it is a X86_64_64 one because it will emit a // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to @@ -2486,6 +2569,22 @@ void RewriteInstance::readRelocations(const SectionRef &Section) { SectionRef RelocatedSection = *SecIter; StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); + + if (BC->IsLinuxKernel) { + if (BC->isX86()) { + if (StringSwitch(RelocatedSectionName) + .Cases(".data..percpu", ".smp_locks", ".orc_unwind", + ".orc_unwind_ip", true) + .Default(false)) + return; + } + if (StringSwitch(RelocatedSectionName) + .Cases("__ksymtab", "__ksymtab_gpl", "__bug_table", + ".altinstructions", true) + .Default(false)) + return; + } + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " << RelocatedSectionName << '\n'); @@ -2519,6 +2618,12 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, if (Relocation::skipRelocationType(RType)) return; + if (BC->IsLinuxKernel) { + if (BC->isInRange("__start___jump_table", "__stop___jump_table", + Rel.getOffset())) + return; + } + // Adjust the relocation type as the linker might have skewed it. if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { if (opts::Verbosity >= 1) @@ -2595,8 +2700,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, MCSymbol *ReferencedSymbol = nullptr; if (!IsSectionRelocation) { - if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) - ReferencedSymbol = BD->getSymbol(); + if (BC->getBinaryDataByName(SymbolName)) + ReferencedSymbol = BC->Ctx->getOrCreateSymbol(SymbolName); else if (BC->isGOTSymbol(SymbolName)) if (BinaryData *BD = BC->getGOTSymbol()) ReferencedSymbol = BD->getSymbol(); @@ -2736,7 +2841,8 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, Addend = Address; LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " << SymbolName << " with addend " << Addend << '\n'); - } else if (ReferencedBF) { + } else if (ReferencedBF && ReferencedSection && + *ReferencedBF->getOriginSection() == *ReferencedSection) { ReferencedSymbol = ReferencedBF->getSymbol(); uint64_t RefFunctionOffset = 0; @@ -2785,7 +2891,7 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, dbgs() << formatv(" at offset {0:x}", RefFunctionOffset); dbgs() << '\n'; }); - } else { + } else if (!ReferencedBF) { if (IsToCode && SymbolAddress) { // This can happen e.g. with PIC-style jump tables. LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " @@ -2907,6 +3013,8 @@ void RewriteInstance::selectFunctionsToProcess() { populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); + populateFunctionNames(opts::KeepAddressFunctionNamesFileNR, + opts::KeepAddressFunctionNamesNR); // Make a set of functions to process to speed up lookups. std::unordered_set ForceFunctionsNR( @@ -2921,6 +3029,10 @@ void RewriteInstance::selectFunctionsToProcess() { exit(1); } + std::unordered_set KeepAddressFunctionsNR( + opts::KeepAddressFunctionNamesNR.begin(), + opts::KeepAddressFunctionNamesNR.end()); + uint64_t LiteThresholdExecCount = 0; if (opts::LiteThresholdPct) { if (opts::LiteThresholdPct > 100) @@ -2968,7 +3080,8 @@ void RewriteInstance::selectFunctionsToProcess() { for (std::string &Name : opts::SkipFunctionNames) if (Function.hasNameRegex(Name)) return true; - + if (BC->HasRelocations && Function.mustKeepAddress()) + return true; return false; }; @@ -3016,6 +3129,10 @@ void RewriteInstance::selectFunctionsToProcess() { for (auto &BFI : BC->getBinaryFunctions()) { BinaryFunction &Function = BFI.second; + for (const StringRef Name : Function.getNames()) + if (KeepAddressFunctionsNR.count(Name.str())) + Function.KeepAddress = true; + // Pseudo functions are explicitly marked by us not to be processed. if (Function.isPseudo()) { Function.IsIgnored = true; @@ -3130,13 +3247,13 @@ void RewriteInstance::preprocessProfileData() { void RewriteInstance::initializeMetadataManager() { if (BC->IsLinuxKernel) - MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC)); + MetadataManager.registerRewriter(createLinuxKernelRewriter(*this)); - MetadataManager.registerRewriter(createBuildIDRewriter(*BC)); + MetadataManager.registerRewriter(createBuildIDRewriter(*this)); - MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC)); + MetadataManager.registerRewriter(createPseudoProbeRewriter(*this)); - MetadataManager.registerRewriter(createSDTRewriter(*BC)); + MetadataManager.registerRewriter(createSDTRewriter(*this)); } void RewriteInstance::processSectionMetadata() { @@ -3771,6 +3888,7 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { << " to 0x" << Twine::utohexstr(Function.getAddress()) << '\n'); MapSection(*FuncSection, Function.getAddress()); + Function.getLayout().getMainFragment().setAddress(Function.getAddress()); Function.setImageAddress(FuncSection->getAllocAddress()); Function.setImageSize(FuncSection->getOutputSize()); if (Function.getImageSize() > Function.getMaxSize()) { @@ -3861,7 +3979,11 @@ void RewriteInstance::mapAllocatableSections( enum : uint8_t { ST_READONLY, ST_READWRITE }; for (uint8_t SType = ST_READONLY; SType <= ST_READWRITE; ++SType) { const uint64_t LastNextAvailableAddress = NextAvailableAddress; + if (SType == ST_READWRITE) { + if (!BC->BOLTReservedRW.empty()) + NextAvailableAddress = BC->BOLTReservedRW.start(); + // Align R+W segment to regular page size NextAvailableAddress = alignTo(NextAvailableAddress, BC->RegularPageSize); NewWritableSegmentAddress = NextAvailableAddress; @@ -3925,9 +4047,23 @@ void RewriteInstance::mapAllocatableSections( } } else if (SType == ST_READWRITE) { NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress; - // Restore NextAvailableAddress if no new writable sections - if (!NewWritableSegmentSize) + + // Even empty sections should be kept for their page align effects + + if (!BC->BOLTReservedRW.empty()) { + const uint64_t AllocatedSize = + NextAvailableAddress - BC->BOLTReservedRW.start(); + if (BC->BOLTReservedRW.size() < AllocatedSize) { + BC->errs() << "BOLT-ERROR: reserved RW space (" + << BC->BOLTReservedRW.size() << " byte" + << (BC->BOLTReservedRW.size() == 1 ? "" : "s") + << ") is smaller than required for new RW allocations (" + << AllocatedSize << " bytes)\n"; + exit(1); + } + NextAvailableAddress = LastNextAvailableAddress; + } } } } @@ -4349,9 +4485,11 @@ RewriteInstance::getOutputSections(ELFObjectFile *File, addSection(NewSection, Section); } - // Sort all allocatable sections by their offset. + // Sort all allocatable sections by their offset and size, to avoid that a + // zero size section cause a preceding non-zero size section truncated. llvm::stable_sort(OutputSections, [](const auto &A, const auto &B) { - return A.second.sh_offset < B.second.sh_offset; + return std::make_tuple(A.second.sh_offset, A.second.sh_size) < + std::make_tuple(B.second.sh_offset, B.second.sh_size); }); // Fix section sizes to prevent overlapping. @@ -4509,6 +4647,10 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile *File) { NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); else NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); + + if (BC->IsLinuxKernel) + NewEhdr.e_entry = Obj.getHeader().e_entry; + assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && "cannot find new address for entry point"); } @@ -4760,10 +4902,24 @@ void RewriteInstance::updateELFSymbolTable( goto registerSymbol; } + if (SymbolName->starts_with("__bolt_reserved_")) { + NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); + goto registerSymbol; + } + if (Function) { // If the symbol matched a function that was not emitted, update the // corresponding section index but otherwise leave it unchanged. if (Function->isEmitted()) { + if (BC->HasRelocations && !Function->IsPatched && BC->IsLinuxKernel) { + ELFSymTy OrgSymbol = Symbol; + SmallVector Buf; + OrgSymbol.st_name = + AddToStrTab(Twine(*SymbolName).concat(".org.0").toStringRef(Buf)); + OrgSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); + if (!IsDynSym) + Symbols.emplace_back(OrgSymbol); + } NewSymbol.st_value = Function->getOutputAddress(); NewSymbol.st_size = Function->getOutputSize(); NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); @@ -4946,6 +5102,14 @@ void RewriteInstance::updateELFSymbolTable( AddEmittedSymbol("__hot_data_end"); } + if (BC->IsLinuxKernel && opts::Instrument) { + AddEmittedSymbol("__bolt_instr_locations"); + AddEmittedSymbol("__bolt_num_counters"); + AddEmittedSymbol("__bolt_instr_num_ind_calls"); + AddEmittedSymbol("__bolt_instr_num_ind_targets"); + AddEmittedSymbol("__bolt_instr_num_funcs"); + } + // Put local symbols at the beginning. llvm::stable_sort(Symbols, [](const ELFSymTy &A, const ELFSymTy &B) { if (A.getBinding() == ELF::STB_LOCAL && B.getBinding() != ELF::STB_LOCAL) @@ -5602,8 +5766,18 @@ void RewriteInstance::rewriteFile() { OS.pwrite(reinterpret_cast(Function->getImageAddress()), Function->getImageSize(), Function->getFileOffset()); + bool ShouldWriteNops = true; + + // For AArch64, Linux kernel alternative instruction replacement sequences + // are not in a seperate section as for X86, but reside in gaps between + // functions. + // Avoid overwriting them by skipping writing nops here. + if (BC->IsLinuxKernel && BC->isAArch64() && !BC->HasRelocations) + ShouldWriteNops = false; + // Write nops at the end of the function. - if (Function->getMaxSize() != std::numeric_limits::max()) { + if (ShouldWriteNops && + Function->getMaxSize() != std::numeric_limits::max()) { uint64_t Pos = OS.tell(); OS.seek(Function->getFileOffset() + Function->getImageSize()); BC->MAB->writeNopData( diff --git a/bolt/lib/Rewrite/SDTRewriter.cpp b/bolt/lib/Rewrite/SDTRewriter.cpp index a3928c554ad66c2c65b6cd3b39c4f070ef37e862..2558403fac763534cb469365219ee748bc319e70 100644 --- a/bolt/lib/Rewrite/SDTRewriter.cpp +++ b/bolt/lib/Rewrite/SDTRewriter.cpp @@ -55,7 +55,8 @@ class SDTRewriter final : public MetadataRewriter { void printSDTMarkers() const; public: - SDTRewriter(StringRef Name, BinaryContext &BC) : MetadataRewriter(Name, BC) {} + SDTRewriter(StringRef Name, RewriteInstance &RI) + : MetadataRewriter(Name, RI) {} Error preCFGInitializer() override; @@ -173,6 +174,6 @@ void SDTRewriter::printSDTMarkers() const { } // namespace std::unique_ptr -llvm::bolt::createSDTRewriter(BinaryContext &BC) { - return std::make_unique("sdt-rewriter", BC); +llvm::bolt::createSDTRewriter(RewriteInstance &RI) { + return std::make_unique("sdt-rewriter", RI); } diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index cd1b975be7b90e636ca4d5d6877cb309994e1207..b5963a2bcbe1dfeafd55f05eca4c712d3c2b7f15 100644 --- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -27,7 +27,7 @@ namespace opts { cl::opt RuntimeInstrumentationLib( "runtime-instrumentation-lib", cl::desc("specify file name of the runtime instrumentation library"), - cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory)); + cl::init(""), cl::cat(BoltOptCategory)); extern cl::opt InstrumentationFileAppendPID; extern cl::opt ConservativeInstrumentation; @@ -42,6 +42,11 @@ extern cl::opt JumpTables; void InstrumentationRuntimeLibrary::adjustCommandLineOptions( const BinaryContext &BC) const { + + if (opts::RuntimeInstrumentationLib.empty()) + opts::RuntimeInstrumentationLib = + BC.IsLinuxKernel ? "libbolt_rt_instr_linux.a" : "libbolt_rt_instr.a"; + if (!BC.HasRelocations) { errs() << "BOLT-ERROR: instrumentation runtime libraries require " "relocations\n"; @@ -51,6 +56,10 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions( opts::JumpTables = JTS_MOVE; outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n"; } + + if (BC.IsLinuxKernel) + return; + if (!BC.StartFunctionAddress) { errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " "entry point of " @@ -191,6 +200,9 @@ void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); Streamer.switchSection(TablesSection); emitString("__bolt_instr_tables", buildTables(BC)); + } else { + emitString("__bolt_instr_tables", + "To avoid \"out of range of Page21 fixup\""); } } @@ -203,6 +215,11 @@ void InstrumentationRuntimeLibrary::link( if (BC.isMachO()) return; + if (BC.IsLinuxKernel) { + emitTablesAsELFNote(BC); + return; + } + RuntimeFiniAddress = Linker.lookupSymbol("__bolt_instr_fini").value_or(0); if (!RuntimeFiniAddress) { errs() << "BOLT-ERROR: instrumentation library does not define " diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index f58f7857e28aeb075dd5eaa13c35e1045fa01fc0..7ad19ef1e74bb9591761b87bbbcce34f53ced579 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1363,6 +1363,16 @@ public: *Ctx, 0))); } + void createCondBranch(MCInst &Inst, const MCSymbol *TBB, unsigned CC, + MCContext *Ctx) const override { + Inst.setOpcode(AArch64::Bcc); + Inst.clear(); + Inst.addOperand(MCOperand::createImm(CC)); + Inst.addOperand(MCOperand::createExpr(getTargetExprFor( + Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx), + *Ctx, 0))); + } + bool shouldRecordCodeRelocation(uint64_t RelType) const override { switch (RelType) { case ELF::R_AARCH64_ABS64: @@ -1413,6 +1423,10 @@ public: return StringRef("\0\0\0\0", 4); } + StringRef getUndefFillValue() const override { + return StringRef("\xff\xff\x00\x00", 4); // UDF + } + void createReturn(MCInst &Inst) const override { Inst.setOpcode(AArch64::RET); Inst.clear(); @@ -1681,6 +1695,9 @@ public: const MCAsmBackend &MAB) const override { const MCFixupKindInfo &FKI = MAB.getFixupKindInfo(Fixup.getKind()); + if (Fixup.getKind() == MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19)) + return std::nullopt; + assert(FKI.TargetOffset == 0 && "0-bit relocation offset expected"); const uint64_t RelOffset = Fixup.getOffset(); diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index f8c83b09395f5ee1a23aebf3a57f50f2c44dcf69..5330506659291407ea052e75cf87b0b5c6d45eaf 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -238,6 +238,10 @@ public: return StringRef("\0\0\0\0", 4); } + StringRef getUndefFillValue() const override { + return StringRef("\x73\x10\x00\xc0", 4); // UNIMP + } + void createCall(unsigned Opcode, MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) { Inst.setOpcode(Opcode); diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 63086c06d74fd980b23ae74cfa4bded50eb1644a..9b51ab0763e3500a3e52c50d013554efaa63f1ad 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -413,6 +413,10 @@ public: StringRef getTrapFillValue() const override { return StringRef("\314", 1); } + StringRef getUndefFillValue() const override { + return StringRef("\x0f\x0b", 2); // UD2 + } + struct IndJmpMatcherFrag1 : MCInstMatcher { std::unique_ptr Base; std::unique_ptr Scale; diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt index 6a65f80fb9079f18258b1c2ce402c0f4bc78581b..e0871ada86abd5e4017e29c93f1b971510024826 100644 --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -17,6 +17,13 @@ add_library(bolt_rt_instr STATIC ${CMAKE_CURRENT_BINARY_DIR}/config.h ) set_target_properties(bolt_rt_instr PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_DIR}") + +add_library(bolt_rt_instr_linux STATIC + instr_linux.cpp + ${CMAKE_CURRENT_BINARY_DIR}/config.h + ) +set_target_properties(bolt_rt_instr_linux PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "lib${LLVM_LIBDIR_SUFFIX}") + add_library(bolt_rt_hugify STATIC hugify.cpp ${CMAKE_CURRENT_BINARY_DIR}/config.h @@ -43,10 +50,12 @@ endif() # Don't let the compiler think it can create calls to standard libs target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(bolt_rt_instr_linux PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}") +install(TARGETS bolt_rt_instr_linux DESTINATION "lib${LLVM_LIBDIR_SUFFIX}") install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}") if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin") diff --git a/bolt/runtime/instr_linux.cpp b/bolt/runtime/instr_linux.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa7bf003e3a12cc1d766969d76f3d936517ea1ce --- /dev/null +++ b/bolt/runtime/instr_linux.cpp @@ -0,0 +1,218 @@ +//===------------------ bolt/runtime/instr_linux.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BOLT runtime library for intrumenting Linux kernel. +// +//===----------------------------------------------------------------------===// + +#ifndef __linux__ +#error "For Linux only" +#endif + +#include +#include + +#if defined(__aarch64__) + +// Save all registers while keeping 16B stack alignment +#define SAVE_ALL \ + "stp x0, x1, [sp, #-16]!\n" \ + "stp x2, x3, [sp, #-16]!\n" \ + "stp x4, x5, [sp, #-16]!\n" \ + "stp x6, x7, [sp, #-16]!\n" \ + "stp x8, x9, [sp, #-16]!\n" \ + "stp x10, x11, [sp, #-16]!\n" \ + "stp x12, x13, [sp, #-16]!\n" \ + "stp x14, x15, [sp, #-16]!\n" \ + "stp x16, x17, [sp, #-16]!\n" \ + "stp x18, x19, [sp, #-16]!\n" \ + "stp x20, x21, [sp, #-16]!\n" \ + "stp x22, x23, [sp, #-16]!\n" \ + "stp x24, x25, [sp, #-16]!\n" \ + "stp x26, x27, [sp, #-16]!\n" \ + "stp x28, x29, [sp, #-16]!\n" \ + "str x30, [sp,#-16]!\n" +// Mirrors SAVE_ALL +#define RESTORE_ALL \ + "ldr x30, [sp], #16\n" \ + "ldp x28, x29, [sp], #16\n" \ + "ldp x26, x27, [sp], #16\n" \ + "ldp x24, x25, [sp], #16\n" \ + "ldp x22, x23, [sp], #16\n" \ + "ldp x20, x21, [sp], #16\n" \ + "ldp x18, x19, [sp], #16\n" \ + "ldp x16, x17, [sp], #16\n" \ + "ldp x14, x15, [sp], #16\n" \ + "ldp x12, x13, [sp], #16\n" \ + "ldp x10, x11, [sp], #16\n" \ + "ldp x8, x9, [sp], #16\n" \ + "ldp x6, x7, [sp], #16\n" \ + "ldp x4, x5, [sp], #16\n" \ + "ldp x2, x3, [sp], #16\n" \ + "ldp x0, x1, [sp], #16\n" + +namespace { + +// Get the difference between runtime addrress of .text section and +// static address in section header table. Can be extracted from arbitrary +// pc value recorded at runtime to get the corresponding static address, which +// in turn can be used to search for indirect call description. Needed because +// indirect call descriptions are read-only non-relocatable data. +uint64_t getTextBaseAddress() { + uint64_t DynAddr; + uint64_t StaticAddr; + __asm__ volatile("b .instr%=\n\t" + ".StaticAddr%=:\n\t" + ".dword __hot_end\n\t" + ".instr%=:\n\t" + "ldr %0, .StaticAddr%=\n\t" + "adrp %1, __hot_end\n\t" + "add %1, %1, :lo12:__hot_end\n\t" + : "=r"(StaticAddr), "=r"(DynAddr)); + return DynAddr - StaticAddr; +} + +} // namespace + +#elif defined(__x86_64__) + +// Save all registers while keeping 16B stack alignment +#define SAVE_ALL \ + "push %%rax\n" \ + "push %%rbx\n" \ + "push %%rcx\n" \ + "push %%rdx\n" \ + "push %%rdi\n" \ + "push %%rsi\n" \ + "push %%rbp\n" \ + "push %%r8\n" \ + "push %%r9\n" \ + "push %%r10\n" \ + "push %%r11\n" \ + "push %%r12\n" \ + "push %%r13\n" \ + "push %%r14\n" \ + "push %%r15\n" \ + "sub $8, %%rsp\n" +// Mirrors SAVE_ALL +#define RESTORE_ALL \ + "add $8, %%rsp\n" \ + "pop %%r15\n" \ + "pop %%r14\n" \ + "pop %%r13\n" \ + "pop %%r12\n" \ + "pop %%r11\n" \ + "pop %%r10\n" \ + "pop %%r9\n" \ + "pop %%r8\n" \ + "pop %%rbp\n" \ + "pop %%rsi\n" \ + "pop %%rdi\n" \ + "pop %%rdx\n" \ + "pop %%rcx\n" \ + "pop %%rbx\n" \ + "pop %%rax\n" + +namespace { + +// Get the difference between runtime addrress of .text section and +// static address in section header table. Can be extracted from arbitrary +// pc value recorded at runtime to get the corresponding static address, which +// in turn can be used to search for indirect call description. Needed because +// indirect call descriptions are read-only non-relocatable data. +uint64_t getTextBaseAddress() { + uint64_t DynAddr; + uint64_t StaticAddr; + __asm__ volatile("leaq __hot_end(%%rip), %0\n\t" + "movabsq $__hot_end, %1\n\t" + : "=r"(DynAddr), "=r"(StaticAddr)); + return DynAddr - StaticAddr; +} + +} // namespace + +#else +#error "Unsupported architecture" +#endif + +#pragma GCC visibility push(hidden) + +extern "C" { +extern void (*__bolt_ind_call_counter_func_pointer)(); +extern void (*__bolt_ind_tailcall_counter_func_pointer)(); +} + +namespace { + +// Base address which we substract from recorded PC values when searching for +// indirect call description entries. Needed because indCall descriptions are +// mapped read-only and contain static addresses. Initialized in +// __bolt_instr_setup. +uint64_t TextBaseAddress = 0; + +} // anonymous namespace + +extern "C" void __bolt_instr_indirect_call(); +extern "C" void __bolt_instr_indirect_tailcall(); + +extern "C" __attribute((force_align_arg_pointer)) void +instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {} + +/// We receive as in-stack arguments the identifier of the indirect call site +/// as well as the target address for the call +extern "C" __attribute((naked)) void __bolt_instr_indirect_call() { +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "ldp x0, x1, [sp, #288]\n" + "bl instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#else + // clang-format off + __asm__ __volatile__(SAVE_ALL + "mov 0xa0(%%rsp), %%rdi\n" + "mov 0x98(%%rsp), %%rsi\n" + "call instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#endif +} + +extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall() { +#if defined(__aarch64__) + // clang-format off + __asm__ __volatile__(SAVE_ALL + "ldp x0, x1, [sp, #288]\n" + "bl instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#else + // clang-format off + __asm__ __volatile__(SAVE_ALL + "mov 0x98(%%rsp), %%rdi\n" + "mov 0x90(%%rsp), %%rsi\n" + "call instrumentIndirectCall\n" + RESTORE_ALL + "ret\n" + :::); + // clang-format on +#endif +} + +extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { + __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call; + __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall; + TextBaseAddress = getTextBaseAddress(); +} diff --git a/bolt/test/X86/dummy-eh-frame-bug.s b/bolt/test/X86/dummy-eh-frame-bug.s index 2d05cf3d88d785599dc226fd281c474fa31c9b62..53ede58541b769d0b43f5fc18d1ec8e53c0fe13f 100644 --- a/bolt/test/X86/dummy-eh-frame-bug.s +++ b/bolt/test/X86/dummy-eh-frame-bug.s @@ -9,7 +9,7 @@ ## after .text when no update is needed to .eh_frame. # CHECK: {{ .text}} PROGBITS [[#%x,ADDR:]] [[#%x,OFFSET:]] [[#%x,SIZE:]] -# CHECK-NEXT: 0000000000000000 [[#%x, OFFSET + SIZE]] +# CHECK-NEXT-TODO: 0000000000000000 [[#%x, OFFSET + SIZE]] .text .globl nocfi_function diff --git a/bolt/test/X86/linux-alt-instruction.s b/bolt/test/X86/linux-alt-instruction.s index fe3abbfc2b4c936223c18bceb014fd41e0c504b8..3e299685cf5bbe3a8c22e301fdddc04e61a30153 100644 --- a/bolt/test/X86/linux-alt-instruction.s +++ b/bolt/test/X86/linux-alt-instruction.s @@ -6,31 +6,9 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: %clang %cflags -nostdlib %t.o -o %t.exe \ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.exe --print-cfg --alt-inst-feature-size=2 -o %t.out \ +# RUN: llvm-bolt %t.exe --print-cfg -o %t.out \ # RUN: | FileCheck %s -## Older kernels used to have padlen field in alt_instr. Check compatibility. - -# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \ -# RUN: %s -o %t.padlen.o -# RUN: %clang %cflags -nostdlib %t.padlen.o -o %t.padlen.exe \ -# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.padlen.exe --print-cfg --alt-inst-has-padlen -o %t.padlen.out \ -# RUN: | FileCheck %s - -## Check with a larger size of "feature" field in alt_instr. - -# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ -# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.fs4.o -# RUN: %clang %cflags -nostdlib %t.fs4.o -o %t.fs4.exe \ -# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie -# RUN: llvm-bolt %t.fs4.exe --print-cfg --alt-inst-feature-size=4 -o %t.fs4.out \ -# RUN: | FileCheck %s - -## Check that out-of-bounds read is handled properly. - -# RUN: not llvm-bolt %t.fs4.exe --alt-inst-feature-size=2 -o %t.fs4.out - ## Check that BOLT automatically detects structure fields in .altinstructions. # RUN: llvm-bolt %t.exe --print-cfg -o %t.out | FileCheck %s @@ -78,11 +56,7 @@ _start: .long .L0 - . # org instruction .long .A0 - . # alt instruction -.ifdef FEATURE_SIZE_4 - .long 0x72 # feature flags -.else .word 0x72 # feature flags -.endif .byte .L1 - .L0 # org size .byte .A1 - .A0 # alt size .ifdef PADLEN @@ -91,11 +65,7 @@ _start: .long .L0 - . # org instruction .long .A1 - . # alt instruction -.ifdef FEATURE_SIZE_4 - .long 0x3b # feature flags -.else .word 0x3b # feature flags -.endif .byte .L1 - .L0 # org size .byte .A2 - .A1 # alt size .ifdef PADLEN @@ -104,11 +74,7 @@ _start: .long .L0 - . # org instruction .long .A2 - . # alt instruction -.ifdef FEATURE_SIZE_4 - .long 0x110 # feature flags -.else .word 0x110 # feature flags -.endif .byte .L1 - .L0 # org size .byte .Ae - .A2 # alt size .ifdef PADLEN @@ -142,6 +108,15 @@ _start: .section .orc_unwind_ip .long .L0 + 2 - . +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.1\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-bug-table.s b/bolt/test/X86/linux-bug-table.s index 63f70a0b35d9fe59c85a68998a019a2175a74c46..4185a0aa1d1cabd62146273c8a258991eeeee5b1 100644 --- a/bolt/test/X86/linux-bug-table.s +++ b/bolt/test/X86/linux-bug-table.s @@ -56,6 +56,15 @@ _start: .long .L1 - . # instruction .org 2b + 12 +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-exceptions.s b/bolt/test/X86/linux-exceptions.s index 20b8c965f853a97cf782307e9f4dc3881489e793..522853465f491cee95842a396147cc9515573c7c 100644 --- a/bolt/test/X86/linux-exceptions.s +++ b/bolt/test/X86/linux-exceptions.s @@ -22,21 +22,21 @@ .globl _start .type _start, %function _start: -# CHECK: Binary Function "_start" +# CHECK-TODO: Binary Function "_start" nop .L0: mov (%rdi), %rax -# CHECK: mov -# CHECK-SAME: ExceptionEntry: 1 # Fixup: [[FIXUP:[a-zA-Z0-9_]+]] +# CHECK-TODO: mov +# CHECK-SAME-TODO: ExceptionEntry: 1 # Fixup: [[FIXUP:[a-zA-Z0-9_]+]] nop .L1: mov (%rsi), %rax -# CHECK: mov -# CHECK-SAME: ExceptionEntry: 2 # Fixup: [[FIXUP]] +# CHECK-TODO: mov +# CHECK-SAME-TODO: ExceptionEntry: 2 # Fixup: [[FIXUP]] nop ret .LF0: -# CHECK: Secondary Entry Point: [[FIXUP]] +# CHECK-TODO: Secondary Entry Point: [[FIXUP]] jmp foo .size _start, .-_start @@ -59,6 +59,15 @@ foo: .long .LF0 - . # fixup .long 0 # data +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 5.10.133\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-orc.s b/bolt/test/X86/linux-orc.s index 5f2096278e92d6a10c04bb387b74d52833d2a540..16a5d156cad8362ed02031c3c5b71e772255e7c3 100644 --- a/bolt/test/X86/linux-orc.s +++ b/bolt/test/X86/linux-orc.s @@ -157,6 +157,15 @@ bar: .section .orc_unwind_ip .long .L4 - . +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-parainstructions.s b/bolt/test/X86/linux-parainstructions.s index 07fca6bbedafaba54b44106519634d93712dad7e..facfcb168b1662b69ddaa08513bdbde851026892 100644 --- a/bolt/test/X86/linux-parainstructions.s +++ b/bolt/test/X86/linux-parainstructions.s @@ -49,6 +49,15 @@ _start: .byte 1 # type .byte 7 # length +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-pci-fixup.s b/bolt/test/X86/linux-pci-fixup.s index a574ba84c4df11efac6a28e0decf68041dce1d4f..876406f35dd573cb362f0a7e4a5e377f72fbf42e 100644 --- a/bolt/test/X86/linux-pci-fixup.s +++ b/bolt/test/X86/linux-pci-fixup.s @@ -36,6 +36,15 @@ _start: .long 0x0 # class shift .long .L0 - . # fixup +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-smp-locks.s b/bolt/test/X86/linux-smp-locks.s index 5f4410d14fc6b08c38dbd13c1a6b234c88ca1d2b..a3bc302b8d0165458e1e15e6ab4b65b108691b3a 100644 --- a/bolt/test/X86/linux-smp-locks.s +++ b/bolt/test/X86/linux-smp-locks.s @@ -35,6 +35,15 @@ _start: .long .L0 - . .long .L1 - . +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-static-calls.s b/bolt/test/X86/linux-static-calls.s index caf95e1c03227d20d1716c13cd0dd0dfe24c2380..397600de96dd9dfacc4644dbbd6c50858298745d 100644 --- a/bolt/test/X86/linux-static-calls.s +++ b/bolt/test/X86/linux-static-calls.s @@ -54,6 +54,15 @@ __start_static_call_sites: .type __stop_static_call_sites, %object __stop_static_call_sites: +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-static-keys.s b/bolt/test/X86/linux-static-keys.s index fb419e0f76275590da3b2a72449db8239b104b56..3d54fece7703a7b26174cd8ecfa1088211282679 100644 --- a/bolt/test/X86/linux-static-keys.s +++ b/bolt/test/X86/linux-static-keys.s @@ -79,6 +79,21 @@ __start___jump_table: .type __stop___jump_table, %object __stop___jump_table: +## Staic keys (we just use the label ignoring the format of the keys). + .data + .align 8 +fake_static_key: + .quad 0 + +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + .string "Linux version 6.6.61\n" + .size linux_banner, . - linux_banner + ## Fake Linux Kernel sections. .section __ksymtab,"a",@progbits .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/linux-version.S b/bolt/test/X86/linux-version.S new file mode 100644 index 0000000000000000000000000000000000000000..e680d0d64a21f96c475d4a2ed42e98c05520363c --- /dev/null +++ b/bolt/test/X86/linux-version.S @@ -0,0 +1,53 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly detects the Linux kernel version + +# RUN: %clang -DA -target x86_64-unknown-unknown \ +# RUN: %cflags -nostdlib %s -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr +# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck --check-prefix=CHECK-A %s + +# RUN: %clang -DB -target x86_64-unknown-unknown \ +# RUN: %cflags -nostdlib %s -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr +# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck --check-prefix=CHECK-B %s + +# RUN: %clang -DC -target x86_64-unknown-unknown \ +# RUN: %cflags -nostdlib %s -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr +# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck --check-prefix=CHECK-C %s + + .text + .globl foo + .type foo, %function +foo: + ret + .size foo, .-foo + +## Linux kernel version + .rodata + .align 16 + .globl linux_banner + .type linux_banner, @object +linux_banner: + +#ifdef A + .string "Linux version 6.6.61\n" +#endif +# CHECK-A: BOLT-INFO: Linux kernel version is 6.6.61 + +#ifdef B + .string "Linux version 6.6.50-rc4\n" +#endif +# CHECK-B: BOLT-INFO: Linux kernel version is 6.6.50 + +#ifdef C + .string "Linux version 6.6\n" +#endif +# CHECK-C: BOLT-INFO: Linux kernel version is 6.6 + + .size linux_banner, . - linux_banner + +## Fake Linux Kernel sections. + .section __ksymtab,"a",@progbits + .section __ksymtab_gpl,"a",@progbits diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s index 545cf37263da56f993304b1bc5e3a1d3542fc4c0..e8311cf652ef9d371eb250852157a138e96b9db3 100644 --- a/bolt/test/X86/section-end-sym.s +++ b/bolt/test/X86/section-end-sym.s @@ -9,7 +9,7 @@ # RUN: | FileCheck %s # CHECK: considering symbol etext for function -# CHECK-NEXT: rejecting as symbol points to end of its section +# CHECK-NEXT: rejecting as symbol is outside its section # CHECK-NOT: Binary Function "etext{{.*}}" after building cfg diff --git a/bolt/tools/CMakeLists.txt b/bolt/tools/CMakeLists.txt index 22ea3b9bd805f3b71c0fa82b04d0e184abd6f3c5..2200a90a18ef5534ed34f07be52c8e16da0858c8 100644 --- a/bolt/tools/CMakeLists.txt +++ b/bolt/tools/CMakeLists.txt @@ -6,4 +6,5 @@ add_subdirectory(driver) add_subdirectory(llvm-bolt-fuzzer) add_subdirectory(bat-dump) add_subdirectory(merge-fdata) +add_subdirectory(bolt-linux-instr) add_subdirectory(heatmap) diff --git a/bolt/tools/bolt-linux-instr/CMakeLists.txt b/bolt/tools/bolt-linux-instr/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..927124518bf53511d7af6a960afaa0454a414f9d --- /dev/null +++ b/bolt/tools/bolt-linux-instr/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Object + Support + ) + +add_bolt_tool(bolt-linux-instr + bolt-linux-instr.cpp + DISABLE_LLVM_LINK_LLVM_DYLIB + ) + +add_dependencies(bolt bolt-linux-instr) diff --git a/bolt/tools/bolt-linux-instr/bolt-linux-instr.cpp b/bolt/tools/bolt-linux-instr/bolt-linux-instr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d85174bbe88fbf89df3c9315b7f9d43f8aedaed7 --- /dev/null +++ b/bolt/tools/bolt-linux-instr/bolt-linux-instr.cpp @@ -0,0 +1,761 @@ + +//===------- bolt/tools/bolt-linux-instr/bolt-linux-instr.cpp -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +#include + +using namespace llvm; +using namespace object; + +namespace { + +cl::OptionCategory + LinuxInstrDataCat("Linux kernel instrumentation data options"); + +cl::SubCommand DumpSubCommand("dump", "Dump Linux kernel instrumentation data"); + +cl::SubCommand DiffSubCommand("diff", "Diff two dumps"); + +cl::opt VmlinuxFilename("v", cl::desc("The vmlinux filename"), + cl::value_desc("filename"), cl::Required, + cl::sub(DumpSubCommand), + cl::sub(DiffSubCommand), + cl::cat(LinuxInstrDataCat)); + +cl::opt OutputFilename("o", + cl::desc("The output .fdata/.dat filename"), + cl::value_desc("filename"), cl::Required, + cl::sub(DumpSubCommand), + cl::sub(DiffSubCommand), + cl::cat(LinuxInstrDataCat)); + +cl::opt Dat1Filename(cl::Positional, + cl::desc("<1st .dat filename>"), cl::Required, + cl::sub(DiffSubCommand), + cl::cat(LinuxInstrDataCat)); + +cl::opt Dat2Filename(cl::Positional, + cl::desc("<2nd .dat filename>"), cl::Optional, + cl::sub(DiffSubCommand), + cl::cat(LinuxInstrDataCat)); + +class ELFCore { +public: + ELFCore(const std::string Filename) : Filename(Filename) {} + + Error init() { + ErrorOr> MBOrErr = + MemoryBuffer::getFileSlice(Filename, 1024 * 1024, 0, true); + if (std::error_code EC = MBOrErr.getError()) + return createStringError(EC.message()); + HeaderMB = std::move(*MBOrErr); + + Expected EFOrErr = + ELFFile::create(HeaderMB->getBuffer()); + if (Error E = EFOrErr.takeError()) + return E; + EF = std::make_unique(std::move(*EFOrErr)); + return Error::success(); + } + + template Expected read(uint64_t Addr) const { + Expected> MBOrErr = read(Addr, sizeof(T)); + if (Error E = MBOrErr.takeError()) + return E; + return *reinterpret_cast((*MBOrErr)->getBuffer().data()); + } + + Expected> read(uint64_t Addr, + uint64_t Size) const { + auto ProgramHeaders = EF->program_headers(); + if (Error E = ProgramHeaders.takeError()) + return E; + + for (auto PH : *ProgramHeaders) { + if (PH.p_memsz != PH.p_filesz) + continue; + + if (PH.p_vaddr <= Addr && Addr + Size <= PH.p_vaddr + PH.p_memsz) { + const uint64_t Offset = PH.p_offset + (Addr - PH.p_vaddr); + + ErrorOr> MBOrErr = + MemoryBuffer::getFileSlice(Filename, Size, Offset, true); + if (std::error_code EC = MBOrErr.getError()) + return createStringError(EC.message()); + return std::move(*MBOrErr); + } + } + return createStringError("invalid range"); + } + + StringRef getFilename() const { return Filename; } + +private: + std::unique_ptr HeaderMB; + std::unique_ptr EF; + + const std::string Filename; +}; + +class ELFObj { +public: + ELFObj(const std::string &Filename) : Filename(Filename) {} + + Error init() { + Expected> OwnBinOrErr = createBinary(Filename); + if (Error E = OwnBinOrErr.takeError()) + return E; + OwnBin = std::make_unique>(std::move(*OwnBinOrErr)); + + EF = dyn_cast>(OwnBin->getBinary()); + if (!EF) + return createStringError("not an ELF64LE object file"); + + for (const ELFSymbolRef &Sym : EF->symbols()) { + Expected NameOrErr = Sym.getName(); + if (!NameOrErr) + continue; + StringRef Name = NameOrErr.get(); + + Expected ValueOrErr = Sym.getValue(); + if (!ValueOrErr) + continue; + uint64_t Value = ValueOrErr.get(); + + SymbolValues[Name] = Value; + } + + return Error::success(); + } + + Expected getSymbolValue(StringRef Name) const { + if (!SymbolValues.contains(Name)) + return createStringError("unknown symbol"); + return SymbolValues.at(Name); + } + + Expected getSection(StringRef Name) const { + for (auto Section : EF->sections()) { + Expected NameOrErr = Section.getName(); + if (NameOrErr && *NameOrErr == Name) + return Section; + } + return createStringError("unknown section"); + } + + Expected getSectionContents(StringRef Name) const { + Expected SectionOrErr = getSection(Name); + if (Error E = SectionOrErr.takeError()) + return E; + return SectionOrErr->getContents(); + } + + StringRef getFilename() const { return Filename; } + +private: + StringMap SymbolValues; + + ELFObjectFile *EF; + std::unique_ptr> OwnBin; + + std::string Filename; +}; + +raw_fd_ostream &operator<<(raw_fd_ostream &OS, std::error_code EC) { + OS << EC.message(); + return OS; +} + +template void report_error(const T &Msg) { + errs() << Msg << "\n"; + exit(EXIT_FAILURE); +} + +template +void report_error(const T &Msg, const Args &...Others) { + errs() << Msg << " : "; + report_error(Others...); +} + +std::unique_ptr readELFCore(const ELFCore &EC, uint64_t Addr, + uint64_t Size) { + Expected> MBOrErr = EC.read(Addr, Size); + if (Error E = MBOrErr.takeError()) + report_error(formatv("{0}:{1:x}:{2:x}", EC.getFilename(), Addr, Size), + std::move(E)); + return std::move(*MBOrErr); +} + +template T readELFCore(const ELFCore &EC, uint64_t Addr) { + std::unique_ptr MB = readELFCore(EC, Addr, sizeof(T)); + return *reinterpret_cast(MB->getBuffer().data()); +} + +uint64_t getSymbolValue(const ELFObj &EO, StringRef Name) { + Expected ValueOrErr = EO.getSymbolValue(Name); + if (Error E = ValueOrErr.takeError()) + report_error(Name, std::move(E)); + return *ValueOrErr; +} + +int dumpMode() { + ELFObj Vmlinux(VmlinuxFilename); + if (Error E = Vmlinux.init()) + report_error(VmlinuxFilename, std::move(E)); + + ELFCore PK("/proc/kcore"); + if (Error E = PK.init()) + report_error(PK.getFilename(), std::move(E)); + + // sanity check + { + StringRef ToCheck = "Linux version "; + uint64_t LinuxBannerAddr = getSymbolValue(Vmlinux, "linux_banner"); + std::unique_ptr MB = + readELFCore(PK, LinuxBannerAddr, ToCheck.size()); + if (MB->getBuffer() != ToCheck) + report_error(formatv("'{0}' is not found at {1}:{2:x}", ToCheck, + PK.getFilename(), LinuxBannerAddr)); + } + + uint64_t BoltInstrLocationsAddr = + getSymbolValue(Vmlinux, "__bolt_instr_locations"); + uint64_t BoltNumCounters = + readELFCore(PK, getSymbolValue(Vmlinux, "__bolt_num_counters")); + + outs() << formatv( + "INFO: __bolt_instr_locations={0:x}, __bolt_num_counters={1:x}\n", + BoltInstrLocationsAddr, BoltNumCounters); + + std::unique_ptr MB = + readELFCore(PK, BoltInstrLocationsAddr, BoltNumCounters * 8); + + std::error_code EC; + raw_fd_ostream OutoutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); + if (EC) + report_error(OutputFilename, EC); + + OutoutFile.write(MB->getBufferStart(), MB->getBufferSize()); + return EXIT_SUCCESS; +} + +std::unique_ptr readFile(StringRef Filename) { + ErrorOr> MBOrErr = + MemoryBuffer::getFile(Filename, + /* IsText */ false, + /* RequiresNullTerminator */ false, + /* IsVolatile */ false, Align(8)); + if (std::error_code EC = MBOrErr.getError()) + report_error(Filename, EC); + return std::move(*MBOrErr); +} + +template +std::unique_ptr> readFileAsVector(StringRef Filename) { + std::unique_ptr MB = readFile(Filename); + uint64_t Size = MB->getBufferSize(); + if (!Size || Size % sizeof(T)) + report_error(formatv("{0} : unexpected size", Filename)); + + return std::move(std::make_unique>( + reinterpret_cast(MB->getBufferStart()), + reinterpret_cast(MB->getBufferEnd()))); +} + +StringRef readSectionContents(const ELFObj &EO, StringRef Name) { + Expected Contents = EO.getSectionContents(Name); + if (Error E = Contents.takeError()) + report_error(EO.getFilename(), Name, std::move(E)); + return *Contents; +} + +struct Location { + uint32_t FunctionName; + uint32_t Offset; +}; + +struct CallDescription { + Location From; + uint32_t FromNode; + Location To; + uint32_t Counter; + uint64_t TargetAddress; +}; + +using IndCallDescription = Location; + +struct IndCallTargetDescription { + Location Loc; + uint64_t Address; +}; + +struct EdgeDescription { + Location From; + uint32_t FromNode; + Location To; + uint32_t ToNode; + uint32_t Counter; +}; + +struct InstrumentedNode { + uint32_t Node; + uint32_t Counter; +}; + +struct EntryNode { + uint64_t Node; + uint64_t Address; +}; + +struct FunctionDescription { + uint32_t NumLeafNodes; + const InstrumentedNode *LeafNodes; + uint32_t NumEdges; + const EdgeDescription *Edges; + uint32_t NumCalls; + const CallDescription *Calls; + uint32_t NumEntryNodes; + const EntryNode *EntryNodes; + + /// Constructor will parse the serialized function metadata written by BOLT + FunctionDescription(const uint8_t *FuncDescData); + + uint64_t getSize() const { + return 16 + NumLeafNodes * sizeof(InstrumentedNode) + + NumEdges * sizeof(EdgeDescription) + + NumCalls * sizeof(CallDescription) + + NumEntryNodes * sizeof(EntryNode); + } +}; + +FunctionDescription::FunctionDescription(const uint8_t *FuncDescData) { + const uint8_t *Ptr = FuncDescData; + NumLeafNodes = *reinterpret_cast(Ptr); + LeafNodes = reinterpret_cast(Ptr + 4); + Ptr += 4 + NumLeafNodes * sizeof(InstrumentedNode); + + NumEdges = *reinterpret_cast(Ptr); + Edges = reinterpret_cast(Ptr + 4); + Ptr += 4 + NumEdges * sizeof(EdgeDescription); + + NumCalls = *reinterpret_cast(Ptr); + Calls = reinterpret_cast(Ptr + 4); + Ptr += 4 + NumCalls * sizeof(CallDescription); + + NumEntryNodes = *reinterpret_cast(Ptr); + EntryNodes = reinterpret_cast(Ptr + 4); +} + +struct CallFlowEntry { + uint64_t Val{0}; + uint64_t Calls{0}; +}; + +struct ProfileWriterContext { + std::unique_ptr> Dat; + + const uint8_t *FuncDescData{nullptr}; + const char *Strings{nullptr}; +}; + +struct Edge { + uint32_t Node; // Index in nodes array regarding the destination of this edge + uint32_t ID; // Edge index in an array comprising all edges of the graph +}; + +struct Node { + uint32_t NumInEdges{0}; // Input edge count used to size InEdge + uint32_t NumOutEdges{0}; // Output edge count used to size OutEdges + std::vector InEdges; // Created and managed by \p Graph + std::vector OutEdges; // ditto +}; + +struct Graph { + uint32_t NumNodes; + std::vector CFGNodes; + std::vector SpanningTreeNodes; + std::vector EdgeFreqs; + std::vector CallFreqs; + const FunctionDescription &FD; + + Graph(const FunctionDescription &FD, const uint64_t *Counters, + ProfileWriterContext &Ctx); + +private: + void computeEdgeFrequencies(const uint64_t *Counters, + ProfileWriterContext &Ctx); +}; + +Graph::Graph(const FunctionDescription &FD, const uint64_t *Counters, + ProfileWriterContext &Ctx) + : FD(FD) { + + // First pass to determine number of nodes + int32_t MaxNodes = -1; + for (uint32_t I = 0; I < FD.NumEdges; ++I) + MaxNodes = std::max({static_cast(FD.Edges[I].FromNode), + static_cast(FD.Edges[I].ToNode), MaxNodes}); + + for (uint32_t I = 0; I < FD.NumLeafNodes; ++I) + MaxNodes = std::max({static_cast(FD.LeafNodes[I].Node), MaxNodes}); + + for (uint32_t I = 0; I < FD.NumCalls; ++I) + MaxNodes = std::max({static_cast(FD.Calls[I].FromNode), MaxNodes}); + + // No nodes? Nothing to do + if (MaxNodes < 0) { + NumNodes = 0; + return; + } + ++MaxNodes; + NumNodes = static_cast(MaxNodes); + + // Initial allocations + CFGNodes = std::vector(MaxNodes); + SpanningTreeNodes = std::vector(MaxNodes); + + // Figure out how much to allocate to each vector (in/out edge sets) + for (uint32_t I = 0; I < FD.NumEdges; ++I) { + const uint32_t Src = FD.Edges[I].FromNode; + const uint32_t Dst = FD.Edges[I].ToNode; + + CFGNodes[Src].NumOutEdges++; + CFGNodes[Dst].NumInEdges++; + + if (FD.Edges[I].Counter == 0xffffffff) { + SpanningTreeNodes[Src].NumOutEdges++; + SpanningTreeNodes[Dst].NumInEdges++; + } + } + + // Allocate in/out edge sets + for (int I = 0; I < MaxNodes; ++I) { + CFGNodes[I].InEdges = std::vector(CFGNodes[I].NumInEdges); + CFGNodes[I].OutEdges = std::vector(CFGNodes[I].NumOutEdges); + SpanningTreeNodes[I].InEdges = + std::vector(SpanningTreeNodes[I].NumInEdges); + SpanningTreeNodes[I].OutEdges = + std::vector(SpanningTreeNodes[I].NumOutEdges); + CFGNodes[I].NumInEdges = 0; + CFGNodes[I].NumOutEdges = 0; + SpanningTreeNodes[I].NumInEdges = 0; + SpanningTreeNodes[I].NumOutEdges = 0; + } + + // Fill in/out edge sets + for (uint32_t I = 0; I < FD.NumEdges; ++I) { + const uint32_t Src = FD.Edges[I].FromNode; + const uint32_t Dst = FD.Edges[I].ToNode; + Edge *E = &CFGNodes[Src].OutEdges[CFGNodes[Src].NumOutEdges++]; + E->Node = Dst; + E->ID = I; + + E = &CFGNodes[Dst].InEdges[CFGNodes[Dst].NumInEdges++]; + E->Node = Src; + E->ID = I; + + if (FD.Edges[I].Counter == 0xffffffff) { + E = &SpanningTreeNodes[Src] + .OutEdges[SpanningTreeNodes[Src].NumOutEdges++]; + E->Node = Dst; + E->ID = I; + + E = &SpanningTreeNodes[Dst].InEdges[SpanningTreeNodes[Dst].NumInEdges++]; + E->Node = Src; + E->ID = I; + } + } + + computeEdgeFrequencies(Counters, Ctx); +} + +/// Auxiliary map structure for fast lookups of which calls map to each node of +/// the function CFG +struct NodeToCallsMap { + NodeToCallsMap(const FunctionDescription &FD, uint32_t NumNodes) + : Entries(NumNodes) { + for (uint32_t I = 0; I < FD.NumCalls; ++I) + ++Entries[FD.Calls[I].FromNode].NumCalls; + + for (uint32_t I = 0; I < Entries.size(); ++I) { + Entries[I].Calls = std::vector(Entries[I].NumCalls); + Entries[I].NumCalls = 0; + } + + for (uint32_t I = 0; I < FD.NumCalls; ++I) { + MapEntry &Entry = Entries[FD.Calls[I].FromNode]; + Entry.Calls[Entry.NumCalls++] = I; + } + } + + /// Set the frequency of all calls in node \p NodeID to Freq. However, if + /// the calls have their own counters and do not depend on the basic block + /// counter, this means they have landing pads and throw exceptions. In this + /// case, set their frequency with their counters and return the maximum + /// value observed in such counters. This will be used as the new frequency + /// at basic block entry. This is used to fix the CFG edge frequencies in the + /// presence of exceptions. + uint64_t visitAllCallsIn(uint32_t NodeID, uint64_t Freq, + std::vector &CallFreqs, + const FunctionDescription &FD, + const uint64_t *Counters, + ProfileWriterContext &Ctx) const { + const MapEntry &Entry = Entries[NodeID]; + uint64_t MaxValue = 0; + for (int I = 0, E = Entry.NumCalls; I != E; ++I) { + const uint32_t CallID = Entry.Calls[I]; + const CallDescription &CallDesc = FD.Calls[CallID]; + if (CallDesc.Counter == 0xffffffff) { + CallFreqs[CallID] = Freq; + } else { + const uint64_t CounterVal = Counters[CallDesc.Counter]; + CallFreqs[CallID] = CounterVal; + if (CounterVal > MaxValue) + MaxValue = CounterVal; + } + } + return MaxValue; + } + + struct MapEntry { + uint32_t NumCalls{0}; + std::vector Calls; + }; + std::vector Entries; +}; + +void Graph::computeEdgeFrequencies(const uint64_t *Counters, + ProfileWriterContext &Ctx) { + if (NumNodes == 0) + return; + + EdgeFreqs = std::vector(FD.NumEdges); + CallFreqs = std::vector(FD.NumCalls); + + // Setup a lookup for calls present in each node (BB) + NodeToCallsMap CallMap(FD, NumNodes); + + // Perform a bottom-up, BFS traversal of the spanning tree in G. Edges in the + // spanning tree don't have explicit counters. We must infer their value using + // a linear combination of other counters (sum of counters of the outgoing + // edges minus sum of counters of the incoming edges). + std::stack Stack; + enum Status : uint8_t { S_NEW = 0, S_VISITING, S_VISITED }; + std::vector Visited(NumNodes); + std::vector LeafFrequency(NumNodes); + std::vector EntryAddress(NumNodes); + + // Setup a fast lookup for frequency of leaf nodes, which have special + // basic block frequency instrumentation (they are not edge profiled). + for (uint32_t I = 0; I < FD.NumLeafNodes; ++I) + LeafFrequency[FD.LeafNodes[I].Node] = Counters[FD.LeafNodes[I].Counter]; + + for (uint32_t I = 0; I < FD.NumEntryNodes; ++I) + EntryAddress[FD.EntryNodes[I].Node] = FD.EntryNodes[I].Address; + + // Add all root nodes to the stack + for (uint32_t I = 0; I < NumNodes; ++I) + if (SpanningTreeNodes[I].NumInEdges == 0) + Stack.push(I); + + if (Stack.empty()) + return; + + // Add all known edge counts, will infer the rest + for (uint32_t I = 0; I < FD.NumEdges; ++I) { + const uint32_t C = FD.Edges[I].Counter; + if (C == 0xffffffff) // inferred counter - we will compute its value + continue; + EdgeFreqs[I] = Counters[C]; + } + + while (!Stack.empty()) { + const uint32_t Cur = Stack.top(); + Stack.pop(); + + // This shouldn't happen in a tree + assert(Visited[Cur] != S_VISITED && + "should not have visited nodes in stack"); + + if (Visited[Cur] == S_NEW) { + Visited[Cur] = S_VISITING; + Stack.push(Cur); + for (int I = 0, E = SpanningTreeNodes[Cur].NumOutEdges; I < E; ++I) { + const uint32_t Succ = SpanningTreeNodes[Cur].OutEdges[I].Node; + Stack.push(Succ); + } + continue; + } + + Visited[Cur] = S_VISITED; + + // Establish our node frequency based on outgoing edges, which should all be + // resolved by now. + uint64_t CurNodeFreq = LeafFrequency[Cur]; + // Not a leaf? + if (!CurNodeFreq) { + for (int I = 0, E = CFGNodes[Cur].NumOutEdges; I != E; ++I) { + const uint32_t SuccEdge = CFGNodes[Cur].OutEdges[I].ID; + CurNodeFreq += EdgeFreqs[SuccEdge]; + } + } + + const uint64_t CallFreq = + CallMap.visitAllCallsIn(Cur, CurNodeFreq, CallFreqs, FD, Counters, Ctx); + if (CallFreq > CurNodeFreq) + CurNodeFreq = CallFreq; + + // No parent? Reached a tree root, limit to call frequency updating. + if (SpanningTreeNodes[Cur].NumInEdges == 0) + continue; + + assert(SpanningTreeNodes[Cur].NumInEdges == 1 && "must have 1 parent"); + const uint32_t ParentEdge = SpanningTreeNodes[Cur].InEdges[0].ID; + + // Calculate parent edge freq. + int64_t ParentEdgeFreq = CurNodeFreq; + for (int I = 0, E = CFGNodes[Cur].NumInEdges; I != E; ++I) { + const uint32_t PredEdge = CFGNodes[Cur].InEdges[I].ID; + ParentEdgeFreq -= EdgeFreqs[PredEdge]; + } + + // Sometimes the conservative CFG that BOLT builds will lead to incorrect + // flow computation. For example, in a BB that transitively calls the exit + // syscall, BOLT will add a fall-through successor even though it should not + // have any successors. So this block execution will likely be wrong. We + // tolerate this imperfection since this case should be quite infrequent. + if (ParentEdgeFreq < 0) + ParentEdgeFreq = 0; + + EdgeFreqs[ParentEdge] = ParentEdgeFreq; + } +} + +void readDescriptions(const ELFObj &Vmlinux, ProfileWriterContext &Ctx) { + StringRef BoltNote = readSectionContents(Vmlinux, ".bolt.instr.tables"); + + const uint8_t *Ptr = BoltNote.bytes_begin() + 20; + uint32_t IndCallDescSize = *reinterpret_cast(Ptr); + Ptr += 4 + IndCallDescSize; + uint32_t IndCallTargetDescSize = *reinterpret_cast(Ptr); + Ptr += 4 + IndCallTargetDescSize; + uint32_t FuncDescSize = *reinterpret_cast(Ptr); + Ctx.FuncDescData = Ptr + 4; + Ctx.Strings = reinterpret_cast(Ptr + 4 + FuncDescSize); +} + +/// Output Location to the fdata file +void serializeLoc(raw_fd_ostream &OS, const ProfileWriterContext &Ctx, + const Location Loc) { + // fdata location format: Type Name Offset + // Type 1 - regular symbol + OS << "1 " << Ctx.Strings + Loc.FunctionName << " " + << Twine::utohexstr(Loc.Offset) << " "; +} + +const uint8_t *writeFunctionProfile(raw_fd_ostream &OS, + ProfileWriterContext &Ctx, + const uint8_t *FuncDescData) { + const FunctionDescription FD(FuncDescData); + const uint8_t *Next = FuncDescData + FD.getSize(); + + Graph G(FD, Ctx.Dat->data(), Ctx); + if (G.EdgeFreqs.empty() && G.CallFreqs.empty()) + return Next; + + for (uint32_t I = 0; I < FD.NumEdges; ++I) { + const uint64_t Freq = G.EdgeFreqs[I]; + if (Freq == 0) + continue; + const EdgeDescription *Desc = &FD.Edges[I]; + serializeLoc(OS, Ctx, Desc->From); + serializeLoc(OS, Ctx, Desc->To); + OS << "0 " << Freq << "\n"; + } + + for (uint32_t I = 0; I < FD.NumCalls; ++I) { + const uint64_t Freq = G.CallFreqs[I]; + if (Freq == 0) + continue; + const CallDescription *Desc = &FD.Calls[I]; + serializeLoc(OS, Ctx, Desc->From); + serializeLoc(OS, Ctx, Desc->To); + OS << "0 " << Freq << "\n"; + } + + return Next; +} + +int diffMode() { + ProfileWriterContext Ctx; + + std::unique_ptr> Dat1 = + readFileAsVector(Dat1Filename); + + if (!Dat2Filename.empty()) { + std::unique_ptr> Dat2 = + readFileAsVector(Dat2Filename); + if (Dat1->size() != Dat2->size()) + report_error(".dat files are not of the same size"); + + for (uint64_t i = 0; i < Dat1->size(); ++i) + (*Dat2)[i] -= (*Dat1)[i]; + Dat1 = std::move(Dat2); + } + + Ctx.Dat = std::move(Dat1); + + std::error_code EC; + raw_fd_ostream OutoutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); + if (EC) + report_error(OutputFilename, EC); + + if (StringRef(OutputFilename).ends_with(".dat")) { + OutoutFile.write(reinterpret_cast(Ctx.Dat->data()), + Ctx.Dat->size() * sizeof(uint64_t)); + return EXIT_SUCCESS; + } + + ELFObj Vmlinux(VmlinuxFilename); + if (Error E = Vmlinux.init()) + report_error(VmlinuxFilename, std::move(E)); + + readDescriptions(Vmlinux, Ctx); + + const uint8_t *FuncDescData = Ctx.FuncDescData; + while (reinterpret_cast(FuncDescData) < + reinterpret_cast(Ctx.Strings)) + FuncDescData = writeFunctionProfile(OutoutFile, Ctx, FuncDescData); + assert(reinterpret_cast(FuncDescData) == + reinterpret_cast(Ctx.Strings)); + return EXIT_SUCCESS; +} + +} // namespace + +int main(int argc, char **argv) { + cl::HideUnrelatedOptions({LinuxInstrDataCat}); + cl::ParseCommandLineOptions(argc, argv); + + if (DumpSubCommand) + return dumpMode(); + + if (DiffSubCommand) + return diffMode(); + + cl::PrintHelpMessage(); + return EXIT_FAILURE; +} diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 22d0708f54786043ba65f8ee4a42ac236fa2536f..d7cf1dc6e9316079f99769efbb4ab8eacd52e3cc 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -84,6 +84,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -274,6 +275,14 @@ static cl::opt DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes.")); +cl::opt + BoltFunctionListFile("bolt-function-list-file", cl::Hidden, + cl::desc("Specify BOLT function list file")); + +cl::opt BoltKeepAddressFunctionListFile( + "bolt-keep-address-function-list-file", cl::Hidden, + cl::desc("Specify BOLT KeepAddress function list file")); + namespace { enum ExtType { @@ -504,7 +513,43 @@ public: char CodeGenPrepareLegacyPass::ID = 0; +template void GatherForBoltKA(raw_fd_ostream &OS, T &I) { + switch (I.getOpcode()) { + case Instruction::ICmp: + case Instruction::PtrToInt: + for (Use &U : I.operands()) + if (auto *FF = dyn_cast(U.get())) + OS << FF->getName() << "\n"; + break; + default:; + } + for (Use &U : I.operands()) + if (auto *CE = dyn_cast(U.get())) + GatherForBoltKA(OS, *CE); +} + bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { + if (!BoltFunctionListFile.empty()) { + std::error_code EC; + raw_fd_ostream OS(BoltFunctionListFile, EC, sys::fs::OpenFlags::OF_Append); + if (EC) + report_fatal_error(Twine(BoltFunctionListFile) + ": " + EC.message()); + OS << F.getName() << "\n"; + } + + if (!BoltKeepAddressFunctionListFile.empty()) { + std::error_code EC; + raw_fd_ostream OS(BoltKeepAddressFunctionListFile, EC, + sys::fs::OpenFlags::OF_Append); + if (EC) + report_fatal_error(Twine(BoltKeepAddressFunctionListFile) + ": " + + EC.message()); + + for (BasicBlock &BB : F) + for (Instruction &I : BB) + GatherForBoltKA(OS, I); + } + if (skipFunction(F)) return false; auto TM = &getAnalysis().getTM();