diff --git a/0001-LoongArch-Add-relax-feature-and-keep-relocations-721.patch b/0001-LoongArch-Add-relax-feature-and-keep-relocations-721.patch new file mode 100644 index 0000000000000000000000000000000000000000..24d8b22c7f2f8ddcb92459676ede6a505d2c3fd5 --- /dev/null +++ b/0001-LoongArch-Add-relax-feature-and-keep-relocations-721.patch @@ -0,0 +1,178 @@ +From 6f135b13769c64a6942b4b232a350b6a6207f2b2 Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Thu, 16 Nov 2023 11:01:26 +0800 +Subject: [PATCH 01/66] [LoongArch] Add relax feature and keep relocations + (#72191) + +Add relax feature. To support linker relocation, we should make +relocation with a symbol rather than section plus offset, and keep all +relocations with non-abs symbol. + +(cherry picked from commit f5bfc833fcbf17a5876911783d1adaca7028d20c) +Change-Id: Ief38b480016175f2cc9939b74a84d9444559ffd6 +--- + llvm/lib/Target/LoongArch/LoongArch.td | 4 +++ + .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 ++ + .../MCTargetDesc/LoongArchAsmBackend.cpp | 5 +-- + .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 18 ++++++++--- + .../MCTargetDesc/LoongArchMCTargetDesc.h | 2 +- + .../MC/LoongArch/Relocations/relax-attr.s | 32 +++++++++++++++++++ + 6 files changed, 55 insertions(+), 8 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-attr.s + +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 0675caa3b601..75b65fe69f26 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -102,6 +102,10 @@ def FeatureUAL + : SubtargetFeature<"ual", "HasUAL", "true", + "Allow memory accesses to be unaligned">; + ++def FeatureRelax ++ : SubtargetFeature<"relax", "HasLinkerRelax", "true", ++ "Enable Linker relaxation">; ++ + //===----------------------------------------------------------------------===// + // Registers, instruction descriptions ... + //===----------------------------------------------------------------------===// +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index 0fbe23f2f62d..5c173675cca4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -43,6 +43,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + bool HasLaGlobalWithAbs = false; + bool HasLaLocalWithAbs = false; + bool HasUAL = false; ++ bool HasLinkerRelax = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +@@ -100,6 +101,7 @@ public: + bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } + bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } + bool hasUAL() const { return HasUAL; } ++ bool hasLinkerRelax() const { return HasLinkerRelax; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index ecb68ff401e9..aae3e544d326 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -168,7 +168,7 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + return true; + switch (Fixup.getTargetKind()) { + default: +- return false; ++ return STI.hasFeature(LoongArch::FeatureRelax); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: +@@ -193,7 +193,8 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + + std::unique_ptr + LoongArchAsmBackend::createObjectTargetWriter() const { +- return createLoongArchELFObjectWriter(OSABI, Is64Bit); ++ return createLoongArchELFObjectWriter( ++ OSABI, Is64Bit, STI.hasFeature(LoongArch::FeatureRelax)); + } + + MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index a6b9c0652639..e60b9c2cfd97 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -20,19 +20,27 @@ using namespace llvm; + namespace { + class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { + public: +- LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); ++ LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool EnableRelax); + + ~LoongArchELFObjectWriter() override; + ++ bool needsRelocateWithSymbol(const MCSymbol &Sym, ++ unsigned Type) const override { ++ return EnableRelax; ++ } ++ + protected: + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; ++ bool EnableRelax; + }; + } // end namespace + +-LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) ++LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, ++ bool EnableRelax) + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, +- /*HasRelocationAddend*/ true) {} ++ /*HasRelocationAddend=*/true), ++ EnableRelax(EnableRelax) {} + + LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} + +@@ -87,6 +95,6 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + } + + std::unique_ptr +-llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) { +- return std::make_unique(OSABI, Is64Bit); ++llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax) { ++ return std::make_unique(OSABI, Is64Bit, Relax); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +index ab35a0096c8a..bb05baa9b717 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +@@ -36,7 +36,7 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T, + const MCTargetOptions &Options); + + std::unique_ptr +-createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); ++createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit, bool Relax); + + } // end namespace llvm + +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s b/llvm/test/MC/LoongArch/Relocations/relax-attr.s +new file mode 100644 +index 000000000000..b1e648d850bb +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s +@@ -0,0 +1,32 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t ++# RUN: llvm-readobj -r %t | FileCheck %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t ++# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=CHECKR ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.data { ++# CHECK-NEXT: 0x0 R_LARCH_64 .text 0x4 ++# CHECK-NEXT: } ++# CHECK-NEXT: ] ++ ++# CHECKR: Relocations [ ++# CHECKR-NEXT: Section ({{.*}}) .rela.text { ++# CHECKR-NEXT: 0x8 R_LARCH_B21 .L1 0x0 ++# CHECKR-NEXT: 0xC R_LARCH_B16 .L1 0x0 ++# CHECKR-NEXT: 0x10 R_LARCH_B26 .L1 0x0 ++# CHECKR-NEXT: } ++# CHECKR-NEXT: Section ({{.*}}) .rela.data { ++# CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0 ++# CHECKR-NEXT: } ++# CHECKR-NEXT: ] ++ ++.text ++ nop ++.L1: ++ nop ++ beqz $a0, .L1 ++ blt $a0, $a1, .L1 ++ b .L1 ++ ++.data ++.dword .L1 +-- +2.20.1 + diff --git a/0001-backport-LoongArch-patches.patch b/0001-backport-LoongArch-patches.patch deleted file mode 100644 index 7328bc0602b18060a1faf2218cc7316caa187778..0000000000000000000000000000000000000000 --- a/0001-backport-LoongArch-patches.patch +++ /dev/null @@ -1,4939 +0,0 @@ -diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst -index 174818417..22067d184 100644 ---- a/llvm/docs/ReleaseNotes.rst -+++ b/llvm/docs/ReleaseNotes.rst -@@ -186,6 +186,8 @@ Changes to the LoongArch Backend - * Initial JITLink support is added. - (`D141036 `_) - -+* The `lp64s` ABI is supported now and has been tested on Rust bare-matal target. -+ - Changes to the MIPS Backend - --------------------------- - -diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -index 67dbd0201..02bce3c71 100644 ---- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def -@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) - ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) - ELF_RELOC(R_LARCH_32_PCREL, 99) - ELF_RELOC(R_LARCH_RELAX, 100) -+ -+// Relocs added in ELF for the LoongArchâ„¢ Architecture v20230519, part of the -+// v2.10 LoongArch ABI specs. -+// -+// Spec addition: https://github.com/loongson/la-abi-specs/pull/1 -+// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138 -+ELF_RELOC(R_LARCH_DELETE, 101) -+ELF_RELOC(R_LARCH_ALIGN, 102) -+ELF_RELOC(R_LARCH_PCREL20_S2, 103) -+ELF_RELOC(R_LARCH_CFA, 104) -+ELF_RELOC(R_LARCH_ADD6, 105) -+ELF_RELOC(R_LARCH_SUB6, 106) -+ELF_RELOC(R_LARCH_ADD_ULEB128, 107) -+ELF_RELOC(R_LARCH_SUB_ULEB128, 108) -+ELF_RELOC(R_LARCH_64_PCREL, 109) -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -index 4ebdcc012..b20d12495 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -@@ -2,8 +2,6 @@ - #define LOONGARCH_FEATURE(NAME, KIND) - #endif - --LOONGARCH_FEATURE("invalid", FK_INVALID) --LOONGARCH_FEATURE("none", FK_NONE) - LOONGARCH_FEATURE("+64bit", FK_64BIT) - LOONGARCH_FEATURE("+f", FK_FP32) - LOONGARCH_FEATURE("+d", FK_FP64) -@@ -11,6 +9,7 @@ LOONGARCH_FEATURE("+lsx", FK_LSX) - LOONGARCH_FEATURE("+lasx", FK_LASX) - LOONGARCH_FEATURE("+lbt", FK_LBT) - LOONGARCH_FEATURE("+lvz", FK_LVZ) -+LOONGARCH_FEATURE("+ual", FK_UAL) - - #undef LOONGARCH_FEATURE - -@@ -18,8 +17,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ) - #define LOONGARCH_ARCH(NAME, KIND, FEATURES) - #endif - --LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID) --LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64) --LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX) -+LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) -+LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) - - #undef LOONGARCH_ARCH -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -index 53f9073e4..028844187 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -@@ -23,9 +23,6 @@ class StringRef; - namespace LoongArch { - - enum FeatureKind : uint32_t { -- FK_INVALID = 0, -- FK_NONE = 1, -- - // 64-bit ISA is available. - FK_64BIT = 1 << 1, - -@@ -46,6 +43,9 @@ enum FeatureKind : uint32_t { - - // Loongson Virtualization Extension is available. - FK_LVZ = 1 << 7, -+ -+ // Allow memory accesses to be unaligned. -+ FK_UAL = 1 << 8, - }; - - struct FeatureInfo { -@@ -64,11 +64,14 @@ struct ArchInfo { - uint32_t Features; - }; - --ArchKind parseArch(StringRef Arch); -+bool isValidArchName(StringRef Arch); - bool getArchFeatures(StringRef Arch, std::vector &Features); -+bool isValidCPUName(StringRef TuneCPU); -+void fillValidCPUList(SmallVectorImpl &Values); -+StringRef getDefaultArch(bool Is64Bit); - - } // namespace LoongArch - - } // namespace llvm - --#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H -+#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H -diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h -index 59513fa2f..5ddb1d314 100644 ---- a/llvm/include/llvm/TargetParser/Triple.h -+++ b/llvm/include/llvm/TargetParser/Triple.h -@@ -846,10 +846,14 @@ public: - : PointerWidth == 64; - } - -+ /// Tests whether the target is 32-bit LoongArch. -+ bool isLoongArch32() const { return getArch() == Triple::loongarch32; } -+ -+ /// Tests whether the target is 64-bit LoongArch. -+ bool isLoongArch64() const { return getArch() == Triple::loongarch64; } -+ - /// Tests whether the target is LoongArch (32- and 64-bit). -- bool isLoongArch() const { -- return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; -- } -+ bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); } - - /// Tests whether the target is MIPS 32-bit (little and big endian). - bool isMIPS32() const { -diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp -index 9a3609bc1..dc5c443ea 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp -@@ -10247,8 +10247,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { - case ISD::ADD: - SDValue N0 = N.getOperand(0); - SDValue N1 = N.getOperand(1); -- if (!isConstantIntBuildVectorOrConstantInt(N0) && -- isConstantIntBuildVectorOrConstantInt(N1)) { -+ if (!isa(N0) && isa(N1)) { - uint64_t Offset = N.getConstantOperandVal(1); - - // Rewrite an ADD constant node into a DIExpression. Since we are -diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp -index bc84988e3..e06dea9d5 100644 ---- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp -+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp -@@ -802,7 +802,8 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { - - if (S.JTMB->getTargetTriple().isOSBinFormatELF() && - (S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64 || -- S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le)) -+ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le || -+ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::loongarch64)) - Layer->setAutoClaimResponsibilityForObjectSymbols(true); - - // FIXME: Explicit conversion to std::unique_ptr added to silence -diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -index a9aaff424..b154ea287 100644 ---- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp -@@ -987,6 +987,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, - // and stubs for branches Thumb - ARM and ARM - Thumb. - writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4] - return Addr + 4; -+ } else if (Arch == Triple::loongarch64) { -+ // lu12i.w $t0, %abs_hi20(addr) -+ // ori $t0, $t0, %abs_lo12(addr) -+ // lu32i.d $t0, %abs64_lo20(addr) -+ // lu52i.d $t0, $t0, %abs64_lo12(addr) -+ // jr $t0 -+ writeBytesUnaligned(0x1400000c, Addr, 4); -+ writeBytesUnaligned(0x0380018c, Addr + 4, 4); -+ writeBytesUnaligned(0x1600000c, Addr + 8, 4); -+ writeBytesUnaligned(0x0300018c, Addr + 12, 4); -+ writeBytesUnaligned(0x4c000180, Addr + 16, 4); -+ return Addr; - } else if (IsMipsO32ABI || IsMipsN32ABI) { - // 0: 3c190000 lui t9,%hi(addr). - // 4: 27390000 addiu t9,t9,%lo(addr). -diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -index 2fe49fefa..f85452bee 100644 ---- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp -@@ -641,6 +641,102 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, - } - } - -+// Returns extract bits Val[Hi:Lo]. -+static inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) { -+ return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo; -+} -+ -+void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, -+ uint64_t Offset, -+ uint64_t Value, uint32_t Type, -+ int64_t Addend) { -+ uint32_t *TargetPtr = -+ reinterpret_cast(Section.getAddressWithOffset(Offset)); -+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); -+ -+ LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x" -+ << format("%llx", Section.getAddressWithOffset(Offset)) -+ << " FinalAddress: 0x" << format("%llx", FinalAddress) -+ << " Value: 0x" << format("%llx", Value) << " Type: 0x" -+ << format("%x", Type) << " Addend: 0x" -+ << format("%llx", Addend) << "\n"); -+ -+ switch (Type) { -+ default: -+ report_fatal_error("Relocation type not implemented yet!"); -+ break; -+ case ELF::R_LARCH_32: -+ *(support::little32_t *)TargetPtr = static_cast(Value + Addend); -+ break; -+ case ELF::R_LARCH_64: -+ *(support::little64_t *)TargetPtr = Value + Addend; -+ break; -+ case ELF::R_LARCH_32_PCREL: -+ *(support::little32_t *)TargetPtr = -+ static_cast(Value - FinalAddress + Addend); -+ break; -+ case ELF::R_LARCH_B26: { -+ uint64_t BranchImm = Value - FinalAddress + Addend; -+ assert(isInt<28>(BranchImm)); -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm = static_cast(BranchImm >> 2); -+ uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; -+ uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16); -+ *(support::little32_t *)TargetPtr = RawInstr | Imm15_0 | Imm25_16; -+ break; -+ } -+ case ELF::R_LARCH_GOT_PC_HI20: -+ case ELF::R_LARCH_PCALA_HI20: { -+ uint64_t Target = Value + Addend; -+ uint64_t TargetPage = -+ (Target + (Target & 0x800)) & ~static_cast(0xfff); -+ uint64_t PCPage = FinalAddress & ~static_cast(0xfff); -+ int64_t PageDelta = TargetPage - PCPage; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; -+ break; -+ } -+ case ELF::R_LARCH_GOT_PC_LO12: -+ case ELF::R_LARCH_PCALA_LO12: { -+ // TODO: code-model=medium -+ uint64_t TargetOffset = (Value + Addend) & 0xfff; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm11_0 = TargetOffset << 10; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; -+ break; -+ } -+ case ELF::R_LARCH_ABS_HI20: { -+ uint64_t Target = Value + Addend; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; -+ break; -+ } -+ case ELF::R_LARCH_ABS_LO12: { -+ uint64_t Target = Value + Addend; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; -+ break; -+ } -+ case ELF::R_LARCH_ABS64_LO20: { -+ uint64_t Target = Value + Addend; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm51_32 = extractBits(Target >> 32, /*Hi=*/19, /*Lo=*/0) << 5; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm51_32; -+ break; -+ } -+ case ELF::R_LARCH_ABS64_HI12: { -+ uint64_t Target = Value + Addend; -+ uint32_t RawInstr = *(support::little32_t *)TargetPtr; -+ uint32_t Imm63_52 = extractBits(Target >> 32, /*Hi=*/31, /*Lo=*/20) << 5; -+ *(support::little32_t *)TargetPtr = RawInstr | Imm63_52; -+ break; -+ } -+ } -+} -+ - void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { - if (Arch == Triple::UnknownArch || - !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { -@@ -1057,6 +1153,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, - resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, - (uint32_t)(Addend & 0xffffffffL)); - break; -+ case Triple::loongarch64: -+ resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); -+ break; - case Triple::ppc: // Fall through. - case Triple::ppcle: - resolvePPC32Relocation(Section, Offset, Value, Type, Addend); -@@ -1209,6 +1308,81 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID, - } - } - -+bool RuntimeDyldELF::resolveLoongArch64ShortBranch( -+ unsigned SectionID, relocation_iterator RelI, -+ const RelocationValueRef &Value) { -+ uint64_t Address; -+ if (Value.SymbolName) { -+ auto Loc = GlobalSymbolTable.find(Value.SymbolName); -+ // Don't create direct branch for external symbols. -+ if (Loc == GlobalSymbolTable.end()) -+ return false; -+ const auto &SymInfo = Loc->second; -+ Address = -+ uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( -+ SymInfo.getOffset())); -+ } else { -+ Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); -+ } -+ uint64_t Offset = RelI->getOffset(); -+ uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); -+ if (!isInt<28>(Address + Value.Addend - SourceAddress)) -+ return false; -+ resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), -+ Value.Addend); -+ return true; -+} -+ -+void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID, -+ const RelocationValueRef &Value, -+ relocation_iterator RelI, -+ StubMap &Stubs) { -+ LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n"); -+ SectionEntry &Section = Sections[SectionID]; -+ uint64_t Offset = RelI->getOffset(); -+ unsigned RelType = RelI->getType(); -+ // Look for an existing stub. -+ StubMap::const_iterator i = Stubs.find(Value); -+ if (i != Stubs.end()) { -+ resolveRelocation(Section, Offset, -+ (uint64_t)Section.getAddressWithOffset(i->second), -+ RelType, 0); -+ LLVM_DEBUG(dbgs() << " Stub function found\n"); -+ } else if (!resolveLoongArch64ShortBranch(SectionID, RelI, Value)) { -+ // Create a new stub function. -+ LLVM_DEBUG(dbgs() << " Create a new stub function\n"); -+ Stubs[Value] = Section.getStubOffset(); -+ uint8_t *StubTargetAddr = createStubFunction( -+ Section.getAddressWithOffset(Section.getStubOffset())); -+ RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(), -+ ELF::R_LARCH_ABS_HI20, Value.Addend); -+ RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4, -+ ELF::R_LARCH_ABS_LO12, Value.Addend); -+ RelocationEntry LU32I_D(SectionID, -+ StubTargetAddr - Section.getAddress() + 8, -+ ELF::R_LARCH_ABS64_LO20, Value.Addend); -+ RelocationEntry LU52I_D(SectionID, -+ StubTargetAddr - Section.getAddress() + 12, -+ ELF::R_LARCH_ABS64_HI12, Value.Addend); -+ if (Value.SymbolName) { -+ addRelocationForSymbol(LU12I_W, Value.SymbolName); -+ addRelocationForSymbol(ORI, Value.SymbolName); -+ addRelocationForSymbol(LU32I_D, Value.SymbolName); -+ addRelocationForSymbol(LU52I_D, Value.SymbolName); -+ } else { -+ addRelocationForSection(LU12I_W, Value.SectionID); -+ addRelocationForSection(ORI, Value.SectionID); -+ addRelocationForSection(LU32I_D, Value.SectionID); -+ addRelocationForSection(LU52I_D, Value.SectionID); -+ } -+ resolveRelocation(Section, Offset, -+ reinterpret_cast(Section.getAddressWithOffset( -+ Section.getStubOffset())), -+ RelType, 0); -+ Section.advanceStubOffset(getMaxStubSize()); -+ } -+} -+ - Expected - RuntimeDyldELF::processRelocationRef( - unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, -@@ -1369,6 +1543,25 @@ RuntimeDyldELF::processRelocationRef( - } - processSimpleRelocation(SectionID, Offset, RelType, Value); - } -+ } else if (Arch == Triple::loongarch64) { -+ if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) { -+ resolveLoongArch64Branch(SectionID, Value, RelI, Stubs); -+ } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 || -+ RelType == ELF::R_LARCH_GOT_PC_LO12) { -+ // FIXME: This will create redundant got entry. -+ uint64_t GOTOffset = allocateGOTEntries(1); -+ // Create relocation for newly created GOT entry. -+ RelocationEntry RE = -+ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_LARCH_64); -+ if (Value.SymbolName) -+ addRelocationForSymbol(RE, Value.SymbolName); -+ else -+ addRelocationForSection(RE, Value.SectionID); -+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, -+ RelType); -+ } else { -+ processSimpleRelocation(SectionID, Offset, RelType, Value); -+ } - } else if (IsMipsO32ABI) { - uint8_t *Placeholder = reinterpret_cast( - computePlaceholderAddress(SectionID, Offset)); -@@ -2214,6 +2407,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { - case Triple::x86_64: - case Triple::aarch64: - case Triple::aarch64_be: -+ case Triple::loongarch64: - case Triple::ppc64: - case Triple::ppc64le: - case Triple::systemz: -@@ -2525,6 +2719,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { - return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE || - RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC; - -+ if (Arch == Triple::loongarch64) -+ return RelTy == ELF::R_LARCH_GOT_PC_HI20 || -+ RelTy == ELF::R_LARCH_GOT_PC_LO12; -+ - if (Arch == Triple::x86_64) - return RelTy == ELF::R_X86_64_GOTPCREL || - RelTy == ELF::R_X86_64_GOTPCRELX || -diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h -index dfdd98cb3..2c930219c 100644 ---- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h -+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h -@@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl { - void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, - uint32_t Value, uint32_t Type, int32_t Addend); - -+ void resolveLoongArch64Relocation(const SectionEntry &Section, -+ uint64_t Offset, uint64_t Value, -+ uint32_t Type, int64_t Addend); -+ -+ bool resolveLoongArch64ShortBranch(unsigned SectionID, -+ relocation_iterator RelI, -+ const RelocationValueRef &Value); -+ -+ void resolveLoongArch64Branch(unsigned SectionID, -+ const RelocationValueRef &Value, -+ relocation_iterator RelI, StubMap &Stubs); -+ - void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, - uint64_t Value, uint32_t Type, int64_t Addend); - -@@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { - return 16; - else if (IsMipsN64ABI) - return 32; -+ if (Arch == Triple::loongarch64) -+ return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr - else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) - return 44; - else if (Arch == Triple::x86_64) -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 7e5c3563f..3e9e8b251 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -115,6 +115,11 @@ def HasLaLocalWithAbs - AssemblerPredicate<(all_of LaLocalWithAbs), - "Expand la.local as la.abs">; - -+// Unaligned memory access -+def FeatureUAL -+ : SubtargetFeature<"ual", "HasUAL", "true", -+ "Allow memory accesses to be unaligned">; -+ - //===----------------------------------------------------------------------===// - // Registers, instruction descriptions ... - //===----------------------------------------------------------------------===// -@@ -128,13 +133,19 @@ include "LoongArchInstrInfo.td" - //===----------------------------------------------------------------------===// - - def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; --def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; -+def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; -+ -+// Generic 64-bit processor with double-precision floating-point support. -+def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, -+ FeatureUAL, -+ FeatureBasicD]>; - - // Support generic for compatibility with other targets. The triple will be used - // to change to the appropriate la32/la64 version. - def : ProcessorModel<"generic", NoSchedModel, []>; - - def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, -+ FeatureUAL, - FeatureExtLASX, - FeatureExtLVZ, - FeatureExtLBT]>; -diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp -index 6d9cb5e17..04fdd41d6 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp -@@ -35,6 +35,12 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { - if (emitPseudoExpansionLowering(*OutStreamer, MI)) - return; - -+ switch (MI->getOpcode()) { -+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: -+ LowerPATCHABLE_FUNCTION_ENTER(*MI); -+ return; -+ } -+ - MCInst TmpInst; - if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) - EmitToStreamer(*OutStreamer, TmpInst); -@@ -110,6 +116,22 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - return false; - } - -+void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( -+ const MachineInstr &MI) { -+ const Function &F = MF->getFunction(); -+ if (F.hasFnAttribute("patchable-function-entry")) { -+ unsigned Num; -+ if (F.getFnAttribute("patchable-function-entry") -+ .getValueAsString() -+ .getAsInteger(10, Num)) -+ return; -+ emitNops(Num); -+ return; -+ } -+ -+ // TODO: Emit sled here once we get support for XRay. -+} -+ - bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - AsmPrinter::runOnMachineFunction(MF); - return true; -diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h -index 23e293547..c8bf657f8 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h -+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h -@@ -41,6 +41,8 @@ public: - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - const char *ExtraCode, raw_ostream &OS) override; - -+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); -+ - // tblgen'erated function. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -index a4a82bdef..19baa4b59 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -@@ -597,13 +597,12 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - } - } - --// Helper function that emits error message for intrinsics with chain. -+// Helper function that emits error message for intrinsics with chain and return -+// merge values of a UNDEF and the chain. - static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, - StringRef ErrorMsg, - SelectionDAG &DAG) { -- -- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + -- ErrorMsg); -+ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); - return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, - SDLoc(Op)); - } -@@ -613,9 +612,11 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - MVT GRLenVT = Subtarget.getGRLenVT(); -- SDValue Op0 = Op.getOperand(0); -- std::string Name = Op->getOperationName(0); -- const StringRef ErrorMsgOOR = "out of range"; -+ EVT VT = Op.getValueType(); -+ SDValue Chain = Op.getOperand(0); -+ const StringRef ErrorMsgOOR = "argument out of range"; -+ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; -+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; - - switch (Op.getConstantOperandVal(1)) { - default: -@@ -627,115 +628,76 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - case Intrinsic::loongarch_crcc_w_b_w: - case Intrinsic::loongarch_crcc_w_h_w: - case Intrinsic::loongarch_crcc_w_w_w: -- case Intrinsic::loongarch_crcc_w_d_w: { -- std::string Name = Op->getOperationName(0); -- DAG.getContext()->emitError(Name + " requires target: loongarch64"); -- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); -- } -+ case Intrinsic::loongarch_crcc_w_d_w: -+ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); - case Intrinsic::loongarch_csrrd_w: - case Intrinsic::loongarch_csrrd_d: { - unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); -- if (!isUInt<14>(Imm)) -- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); -- return DAG.getMergeValues( -- {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)), -- Op0}, -- DL); -+ return !isUInt<14>(Imm) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - } - case Intrinsic::loongarch_csrwr_w: - case Intrinsic::loongarch_csrwr_d: { - unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); -- if (!isUInt<14>(Imm)) -- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); -- return DAG.getMergeValues( -- {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2), -- DAG.getConstant(Imm, DL, GRLenVT)), -- Op0}, -- DL); -+ return !isUInt<14>(Imm) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, -+ {Chain, Op.getOperand(2), -+ DAG.getConstant(Imm, DL, GRLenVT)}); - } - case Intrinsic::loongarch_csrxchg_w: - case Intrinsic::loongarch_csrxchg_d: { - unsigned Imm = cast(Op.getOperand(4))->getZExtValue(); -- if (!isUInt<14>(Imm)) -- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); -- return DAG.getMergeValues( -- {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2), -- Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)), -- Op0}, -- DL); -+ return !isUInt<14>(Imm) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, -+ {Chain, Op.getOperand(2), Op.getOperand(3), -+ DAG.getConstant(Imm, DL, GRLenVT)}); - } - case Intrinsic::loongarch_iocsrrd_d: { -- if (Subtarget.is64Bit()) -- return DAG.getMergeValues( -- {DAG.getNode( -- LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0, -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))), -- Op0}, -- DL); -- else { -- DAG.getContext()->emitError( -- "llvm.loongarch.crc.w.d.w requires target: loongarch64"); -- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); -- } -+ return DAG.getNode( -+ LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); - } - #define IOCSRRD_CASE(NAME, NODE) \ - case Intrinsic::loongarch_##NAME: { \ -- return DAG.getMergeValues( \ -- {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \ -- Op0}, \ -- DL); \ -+ return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ -+ {Chain, Op.getOperand(2)}); \ - } - IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); - IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); - IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); - #undef IOCSRRD_CASE - case Intrinsic::loongarch_cpucfg: { -- return DAG.getMergeValues( -- {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)), -- Op0}, -- DL); -+ return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, -+ {Chain, Op.getOperand(2)}); - } - case Intrinsic::loongarch_lddir_d: { - unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); -- if (!isUInt<8>(Imm)) { -- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + -- "' out of range"); -- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); -- } -- -- return Op; -+ return !isUInt<8>(Imm) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : Op; - } - case Intrinsic::loongarch_movfcsr2gr: { -- if (!Subtarget.hasBasicF()) { -- DAG.getContext()->emitError( -- "llvm.loongarch.movfcsr2gr expects basic f target feature"); -- return DAG.getMergeValues( -- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); -- } -+ if (!Subtarget.hasBasicF()) -+ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); - unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); -- if (!isUInt<2>(Imm)) { -- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + -- "' " + ErrorMsgOOR); -- return DAG.getMergeValues( -- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); -- } -- return DAG.getMergeValues( -- {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(), -- DAG.getConstant(Imm, DL, GRLenVT)), -- Op.getOperand(0)}, -- DL); -+ return !isUInt<2>(Imm) -+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, -+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - } - } - } - - // Helper function that emits error message for intrinsics with void return --// value. -+// value and return the chain. - static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, - SelectionDAG &DAG) { - -- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + -- ErrorMsg); -+ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); - return Op.getOperand(0); - } - -@@ -743,10 +705,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - MVT GRLenVT = Subtarget.getGRLenVT(); -- SDValue Op0 = Op.getOperand(0); -+ SDValue Chain = Op.getOperand(0); - uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); - SDValue Op2 = Op.getOperand(2); -- const StringRef ErrorMsgOOR = "out of range"; -+ const StringRef ErrorMsgOOR = "argument out of range"; -+ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; -+ const StringRef ErrorMsgReqLA32 = "requires loongarch32"; -+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; - - switch (IntrinsicEnum) { - default: -@@ -754,122 +719,93 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, - return SDValue(); - case Intrinsic::loongarch_cacop_d: - case Intrinsic::loongarch_cacop_w: { -- if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) { -- DAG.getContext()->emitError( -- "llvm.loongarch.cacop.d requires target: loongarch64"); -- return Op.getOperand(0); -- } -- if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) { -- DAG.getContext()->emitError( -- "llvm.loongarch.cacop.w requires target: loongarch32"); -- return Op.getOperand(0); -- } -+ if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) -+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); -+ if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) -+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); - // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) - unsigned Imm1 = cast(Op2)->getZExtValue(); -- if (!isUInt<5>(Imm1)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- SDValue Op4 = Op.getOperand(4); -- int Imm2 = cast(Op4)->getSExtValue(); -- if (!isInt<12>(Imm2)) -+ int Imm2 = cast(Op.getOperand(4))->getSExtValue(); -+ if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) - return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- - return Op; - } -- - case Intrinsic::loongarch_dbar: { - unsigned Imm = cast(Op2)->getZExtValue(); -- if (!isUInt<15>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- -- return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)); -+ return !isUInt<15>(Imm) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, -+ DAG.getConstant(Imm, DL, GRLenVT)); - } - case Intrinsic::loongarch_ibar: { - unsigned Imm = cast(Op2)->getZExtValue(); -- if (!isUInt<15>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- -- return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)); -+ return !isUInt<15>(Imm) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, -+ DAG.getConstant(Imm, DL, GRLenVT)); - } - case Intrinsic::loongarch_break: { - unsigned Imm = cast(Op2)->getZExtValue(); -- if (!isUInt<15>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- -- return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)); -+ return !isUInt<15>(Imm) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, -+ DAG.getConstant(Imm, DL, GRLenVT)); - } - case Intrinsic::loongarch_movgr2fcsr: { -- if (!Subtarget.hasBasicF()) { -- DAG.getContext()->emitError( -- "llvm.loongarch.movgr2fcsr expects basic f target feature"); -- return Op0; -- } -+ if (!Subtarget.hasBasicF()) -+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); - unsigned Imm = cast(Op2)->getZExtValue(); -- if (!isUInt<2>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- -- return DAG.getNode( -- LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0, -- DAG.getConstant(Imm, DL, GRLenVT), -- DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3))); -+ return !isUInt<2>(Imm) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, -+ DAG.getConstant(Imm, DL, GRLenVT), -+ DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, -+ Op.getOperand(3))); - } - case Intrinsic::loongarch_syscall: { - unsigned Imm = cast(Op2)->getZExtValue(); -- if (!isUInt<15>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- -- return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)); -+ return !isUInt<15>(Imm) -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, -+ DAG.getConstant(Imm, DL, GRLenVT)); - } - #define IOCSRWR_CASE(NAME, NODE) \ - case Intrinsic::loongarch_##NAME: { \ - SDValue Op3 = Op.getOperand(3); \ -- if (Subtarget.is64Bit()) \ -- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \ -- else \ -- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \ -+ return Subtarget.is64Bit() \ -+ ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ -+ : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ -+ Op3); \ - } - IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); - IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); - IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); - #undef IOCSRWR_CASE - case Intrinsic::loongarch_iocsrwr_d: { -- if (Subtarget.is64Bit()) -- return DAG.getNode( -- LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2, -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3))); -- else { -- DAG.getContext()->emitError( -- "llvm.loongarch.iocsrwr.d requires target: loongarch64"); -- return Op.getOperand(0); -- } -+ return !Subtarget.is64Bit() -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) -+ : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, -+ Op2, -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, -+ Op.getOperand(3))); - } - #define ASRT_LE_GT_CASE(NAME) \ - case Intrinsic::loongarch_##NAME: { \ -- if (!Subtarget.is64Bit()) { \ -- DAG.getContext()->emitError(Op->getOperationName(0) + \ -- " requires target: loongarch64"); \ -- return Op.getOperand(0); \ -- } \ -- return Op; \ -+ return !Subtarget.is64Bit() \ -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ -+ : Op; \ - } - ASRT_LE_GT_CASE(asrtle_d) - ASRT_LE_GT_CASE(asrtgt_d) - #undef ASRT_LE_GT_CASE - case Intrinsic::loongarch_ldpte_d: { - unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); -- if (!isUInt<8>(Imm)) -- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); -- if (!Subtarget.is64Bit()) { -- DAG.getContext()->emitError(Op->getOperationName(0) + -- " requires target: loongarch64"); -- return Op.getOperand(0); -- } -- return Op; -+ return !Subtarget.is64Bit() -+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) -+ : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) -+ : Op; - } - } - } -@@ -1022,6 +958,16 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, - return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); - } - -+// Helper function that emits error message for intrinsics with chain and return -+// a UNDEF and the chain as the results. -+static void emitErrorAndReplaceIntrinsicWithChainResults( -+ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, -+ StringRef ErrorMsg) { -+ DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); -+ Results.push_back(DAG.getUNDEF(N->getValueType(0))); -+ Results.push_back(N->getOperand(0)); -+} -+ - void LoongArchTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - SDLoc DL(N); -@@ -1142,50 +1088,44 @@ void LoongArchTargetLowering::ReplaceNodeResults( - break; - } - case ISD::INTRINSIC_W_CHAIN: { -- SDValue Op0 = N->getOperand(0); -- EVT VT = N->getValueType(0); -- uint64_t Op1 = N->getConstantOperandVal(1); -+ SDValue Chain = N->getOperand(0); -+ SDValue Op2 = N->getOperand(2); - MVT GRLenVT = Subtarget.getGRLenVT(); -- if (Op1 == Intrinsic::loongarch_movfcsr2gr) { -+ const StringRef ErrorMsgOOR = "argument out of range"; -+ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; -+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; -+ -+ switch (N->getConstantOperandVal(1)) { -+ default: -+ llvm_unreachable("Unexpected Intrinsic."); -+ case Intrinsic::loongarch_movfcsr2gr: { - if (!Subtarget.hasBasicF()) { -- DAG.getContext()->emitError( -- "llvm.loongarch.movfcsr2gr expects basic f target feature"); -- Results.push_back(DAG.getMergeValues( -- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); -- Results.push_back(N->getOperand(0)); -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgReqF); - return; - } -- unsigned Imm = cast(N->getOperand(2))->getZExtValue(); -+ unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<2>(Imm)) { -- DAG.getContext()->emitError("argument to '" + N->getOperationName(0) + -- "' " + "out of range"); -- Results.push_back(DAG.getMergeValues( -- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); -- Results.push_back(N->getOperand(0)); -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgOOR); - return; - } -+ SDValue MOVFCSR2GRResults = DAG.getNode( -+ LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, -+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - Results.push_back( -- DAG.getNode(ISD::TRUNCATE, DL, VT, -- DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64, -- DAG.getConstant(Imm, DL, GRLenVT)))); -- Results.push_back(N->getOperand(0)); -- return; -+ DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); -+ Results.push_back(MOVFCSR2GRResults.getValue(1)); -+ break; - } -- SDValue Op2 = N->getOperand(2); -- std::string Name = N->getOperationName(0); -- -- switch (Op1) { -- default: -- llvm_unreachable("Unexpected Intrinsic."); - #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ - case Intrinsic::loongarch_##NAME: { \ -- Results.push_back(DAG.getNode( \ -- ISD::TRUNCATE, DL, VT, \ -- DAG.getNode( \ -- LoongArchISD::NODE, DL, MVT::i64, \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \ -- Results.push_back(N->getOperand(0)); \ -+ SDValue NODE = DAG.getNode( \ -+ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ -+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ -+ Results.push_back(NODE.getValue(1)); \ - break; \ - } - CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) -@@ -1198,12 +1138,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( - - #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ - case Intrinsic::loongarch_##NAME: { \ -- Results.push_back( \ -- DAG.getNode(ISD::TRUNCATE, DL, VT, \ -- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \ -- N->getOperand(3))))); \ -- Results.push_back(N->getOperand(0)); \ -+ SDValue NODE = DAG.getNode( \ -+ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ -+ {Chain, Op2, \ -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ -+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ -+ Results.push_back(NODE.getValue(1)); \ - break; \ - } - CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) -@@ -1211,11 +1151,9 @@ void LoongArchTargetLowering::ReplaceNodeResults( - #undef CRC_CASE_EXT_UNARYOP - #define CSR_CASE(ID) \ - case Intrinsic::loongarch_##ID: { \ -- if (!Subtarget.is64Bit()) { \ -- DAG.getContext()->emitError(Name + " requires target: loongarch64"); \ -- Results.push_back(DAG.getUNDEF(VT)); \ -- Results.push_back(N->getOperand(0)); \ -- } \ -+ if (!Subtarget.is64Bit()) \ -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ -+ ErrorMsgReqLA64); \ - break; \ - } - CSR_CASE(csrrd_d); -@@ -1226,62 +1164,59 @@ void LoongArchTargetLowering::ReplaceNodeResults( - case Intrinsic::loongarch_csrrd_w: { - unsigned Imm = cast(Op2)->getZExtValue(); - if (!isUInt<14>(Imm)) { -- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); -- Results.push_back(DAG.getUNDEF(VT)); -- Results.push_back(N->getOperand(0)); -- break; -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgOOR); -+ return; - } -- -+ SDValue CSRRDResults = -+ DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); - Results.push_back( -- DAG.getNode(ISD::TRUNCATE, DL, VT, -- DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, -- DAG.getConstant(Imm, DL, GRLenVT)))); -- Results.push_back(N->getOperand(0)); -+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); -+ Results.push_back(CSRRDResults.getValue(1)); - break; - } - case Intrinsic::loongarch_csrwr_w: { - unsigned Imm = cast(N->getOperand(3))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); -- Results.push_back(DAG.getUNDEF(VT)); -- Results.push_back(N->getOperand(0)); -- break; -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgOOR); -+ return; - } -- -- Results.push_back(DAG.getNode( -- ISD::TRUNCATE, DL, VT, -- DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), -- DAG.getConstant(Imm, DL, GRLenVT)))); -- Results.push_back(N->getOperand(0)); -+ SDValue CSRWRResults = -+ DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), -+ DAG.getConstant(Imm, DL, GRLenVT)}); -+ Results.push_back( -+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); -+ Results.push_back(CSRWRResults.getValue(1)); - break; - } - case Intrinsic::loongarch_csrxchg_w: { - unsigned Imm = cast(N->getOperand(4))->getZExtValue(); - if (!isUInt<14>(Imm)) { -- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); -- Results.push_back(DAG.getUNDEF(VT)); -- Results.push_back(N->getOperand(0)); -- break; -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgOOR); -+ return; - } -- -- Results.push_back(DAG.getNode( -- ISD::TRUNCATE, DL, VT, -- DAG.getNode( -- LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), -- DAG.getConstant(Imm, DL, GRLenVT)))); -- Results.push_back(N->getOperand(0)); -+ SDValue CSRXCHGResults = DAG.getNode( -+ LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), -+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), -+ DAG.getConstant(Imm, DL, GRLenVT)}); -+ Results.push_back( -+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); -+ Results.push_back(CSRXCHGResults.getValue(1)); - break; - } - #define IOCSRRD_CASE(NAME, NODE) \ - case Intrinsic::loongarch_##NAME: { \ -- Results.push_back(DAG.getNode( \ -- ISD::TRUNCATE, DL, N->getValueType(0), \ -- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \ -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \ -- Results.push_back(N->getOperand(0)); \ -+ SDValue IOCSRRDResults = \ -+ DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ -+ Results.push_back( \ -+ DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ -+ Results.push_back(IOCSRRDResults.getValue(1)); \ - break; \ - } - IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); -@@ -1289,20 +1224,19 @@ void LoongArchTargetLowering::ReplaceNodeResults( - IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); - #undef IOCSRRD_CASE - case Intrinsic::loongarch_cpucfg: { -- Results.push_back(DAG.getNode( -- ISD::TRUNCATE, DL, VT, -- DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, -- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); -- Results.push_back(Op0); -+ SDValue CPUCFGResults = -+ DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, -+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); -+ Results.push_back( -+ DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); -+ Results.push_back(CPUCFGResults.getValue(1)); - break; - } - case Intrinsic::loongarch_lddir_d: { - if (!Subtarget.is64Bit()) { -- DAG.getContext()->emitError(N->getOperationName(0) + -- " requires target: loongarch64"); -- Results.push_back(DAG.getUNDEF(VT)); -- Results.push_back(Op0); -- break; -+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, -+ ErrorMsgReqLA64); -+ return; - } - break; - } -@@ -1764,6 +1698,18 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - } - } - -+bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( -+ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, -+ unsigned *Fast) const { -+ if (!Subtarget.hasUAL()) -+ return false; -+ -+ // TODO: set reasonable speed number. -+ if (Fast) -+ *Fast = 1; -+ return true; -+} -+ - const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((LoongArchISD::NodeType)Opcode) { - case LoongArchISD::FIRST_NUMBER: -@@ -1907,7 +1853,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - default: - llvm_unreachable("Unexpected ABI"); - case LoongArchABI::ABI_ILP32S: -- case LoongArchABI::ABI_LP64S: - case LoongArchABI::ABI_ILP32F: - case LoongArchABI::ABI_LP64F: - report_fatal_error("Unimplemented ABI"); -@@ -1916,6 +1861,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, - case LoongArchABI::ABI_LP64D: - UseGPRForFloat = !IsFixed; - break; -+ case LoongArchABI::ABI_LP64S: -+ break; - } - - // FPR32 and FPR64 alias each other. -diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -index 0ddcda66d..62c83384c 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h -@@ -80,7 +80,22 @@ enum NodeType : unsigned { - CRCC_W_D_W, - - CSRRD, -+ -+ // Write new value to CSR and return old value. -+ // Operand 0: A chain pointer. -+ // Operand 1: The new value to write. -+ // Operand 2: The address of the required CSR. -+ // Result 0: The old value of the CSR. -+ // Result 1: The new chain pointer. - CSRWR, -+ -+ // Similar to CSRWR but with a write mask. -+ // Operand 0: A chain pointer. -+ // Operand 1: The new value to write. -+ // Operand 2: The write mask. -+ // Operand 3: The address of the required CSR. -+ // Result 0: The old value of the CSR. -+ // Result 1: The new chain pointer. - CSRXCHG, - - // IOCSR access operations -@@ -181,6 +196,11 @@ public: - bool decomposeMulByConstant(LLVMContext &Context, EVT VT, - SDValue C) const override; - -+ bool allowsMisalignedMemoryAccesses( -+ EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), -+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, -+ unsigned *Fast = nullptr) const override; -+ - private: - /// Target-specific function used to lower LoongArch calling conventions. - typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -index fbbb764b8..3e19f3e2f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -@@ -17,6 +17,7 @@ - #include "MCTargetDesc/LoongArchMCTargetDesc.h" - #include "MCTargetDesc/LoongArchMatInt.h" - #include "llvm/CodeGen/RegisterScavenging.h" -+#include "llvm/MC/MCInstBuilder.h" - - using namespace llvm; - -@@ -28,6 +29,13 @@ LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) - LoongArch::ADJCALLSTACKUP), - STI(STI) {} - -+MCInst LoongArchInstrInfo::getNop() const { -+ return MCInstBuilder(LoongArch::ANDI) -+ .addReg(LoongArch::R0) -+ .addReg(LoongArch::R0) -+ .addImm(0); -+} -+ - void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, MCRegister DstReg, -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h -index e2b80460f..cf83abf27 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h -@@ -27,6 +27,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { - public: - explicit LoongArchInstrInfo(LoongArchSubtarget &STI); - -+ MCInst getNop() const override; -+ - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, - bool KillSrc) const override; -diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -index 75b2adc72..f20beee92 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td -@@ -75,21 +75,21 @@ def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; - def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; - def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>; - def loongarch_crc_w_b_w -- : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crc_w_h_w -- : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crc_w_w_w -- : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crc_w_d_w -- : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crcc_w_b_w -- : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crcc_w_h_w -- : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crcc_w_w_w -- : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_crcc_w_d_w -- : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW>; -+ : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; - def loongarch_bstrins - : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; - def loongarch_bstrpick -@@ -106,7 +106,8 @@ def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI, - [SDNPHasChain, SDNPSideEffect]>; - def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI, - [SDNPHasChain, SDNPSideEffect]>; --def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr>; -+def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr, -+ [SDNPHasChain]>; - def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr, - [SDNPHasChain, SDNPSideEffect]>; - def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI, -@@ -139,7 +140,7 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D", - SDT_LoongArchIocsrwr, - [SDNPHasChain, SDNPSideEffect]>; - def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp, -- [SDNPHasChain, SDNPSideEffect]>; -+ [SDNPHasChain]>; - - //===----------------------------------------------------------------------===// - // Operand and SDNode transformation definitions. -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp -index d8850f656..a0136440e 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp -@@ -12,6 +12,7 @@ - - #include "LoongArchSubtarget.h" - #include "LoongArchFrameLowering.h" -+#include "MCTargetDesc/LoongArchBaseInfo.h" - - using namespace llvm; - -@@ -48,8 +49,8 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( - if (!Is64Bit && HasLA64) - report_fatal_error("Feature 64bit should be used for loongarch64 target."); - -- // TODO: ILP32{S,F} LP64{S,F} -- TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; -+ TargetABI = LoongArchABI::computeTargetABI(TT, ABIName); -+ - return *this; - } - -diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -index aa87638e4..4ff42e3b1 100644 ---- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h -@@ -42,6 +42,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { - bool HasLaGlobalWithPcrel = false; - bool HasLaGlobalWithAbs = false; - bool HasLaLocalWithAbs = false; -+ bool HasUAL = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; -@@ -91,6 +92,7 @@ public: - bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; } - bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } - bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } -+ bool hasUAL() const { return HasUAL; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -index ff0804e2a..ecb68ff40 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -@@ -202,5 +202,5 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, - const MCTargetOptions &Options) { - const Triple &TT = STI.getTargetTriple(); - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); -- return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); -+ return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit(), Options); - } -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -index 0d04cecc4..ae9bb8af0 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -@@ -26,11 +26,13 @@ class LoongArchAsmBackend : public MCAsmBackend { - const MCSubtargetInfo &STI; - uint8_t OSABI; - bool Is64Bit; -+ const MCTargetOptions &TargetOptions; - - public: -- LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) -- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), -- Is64Bit(Is64Bit) {} -+ LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, -+ const MCTargetOptions &Options) -+ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), -+ TargetOptions(Options) {} - ~LoongArchAsmBackend() override {} - - void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, -@@ -63,6 +65,7 @@ public: - - std::unique_ptr - createObjectTargetWriter() const override; -+ const MCTargetOptions &getTargetOptions() const { return TargetOptions; } - }; - } // end namespace llvm - -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp -index de2ba2833..28404f04d 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp -@@ -15,11 +15,71 @@ - #include "llvm/ADT/ArrayRef.h" - #include "llvm/ADT/Triple.h" - #include "llvm/MC/MCSubtargetInfo.h" -+#include "llvm/Support/raw_ostream.h" - - namespace llvm { - - namespace LoongArchABI { - -+ABI computeTargetABI(const Triple &TT, StringRef ABIName) { -+ ABI ArgProvidedABI = getTargetABI(ABIName); -+ bool Is64Bit = TT.isArch64Bit(); -+ ABI TripleABI; -+ -+ // Figure out the ABI explicitly requested via the triple's environment type. -+ switch (TT.getEnvironment()) { -+ case llvm::Triple::EnvironmentType::GNUSF: -+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64S : LoongArchABI::ABI_ILP32S; -+ break; -+ case llvm::Triple::EnvironmentType::GNUF32: -+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64F : LoongArchABI::ABI_ILP32F; -+ break; -+ -+ // Let the fallback case behave like {ILP32,LP64}D. -+ case llvm::Triple::EnvironmentType::GNUF64: -+ default: -+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; -+ break; -+ } -+ -+ switch (ArgProvidedABI) { -+ case LoongArchABI::ABI_Unknown: -+ // Fallback to the triple-implied ABI if ABI name is not specified or -+ // invalid. -+ if (!ABIName.empty()) -+ errs() << "'" << ABIName -+ << "' is not a recognized ABI for this target, ignoring and using " -+ "triple-implied ABI\n"; -+ return TripleABI; -+ -+ case LoongArchABI::ABI_ILP32S: -+ case LoongArchABI::ABI_ILP32F: -+ case LoongArchABI::ABI_ILP32D: -+ if (Is64Bit) { -+ errs() << "32-bit ABIs are not supported for 64-bit targets, ignoring " -+ "target-abi and using triple-implied ABI\n"; -+ return TripleABI; -+ } -+ break; -+ -+ case LoongArchABI::ABI_LP64S: -+ case LoongArchABI::ABI_LP64F: -+ case LoongArchABI::ABI_LP64D: -+ if (!Is64Bit) { -+ errs() << "64-bit ABIs are not supported for 32-bit targets, ignoring " -+ "target-abi and using triple-implied ABI\n"; -+ return TripleABI; -+ } -+ break; -+ } -+ -+ if (!ABIName.empty() && TT.hasEnvironment() && ArgProvidedABI != TripleABI) -+ errs() << "warning: triple-implied ABI conflicts with provided target-abi '" -+ << ABIName << "', using target-abi\n"; -+ -+ return ArgProvidedABI; -+} -+ - ABI getTargetABI(StringRef ABIName) { - auto TargetABI = StringSwitch(ABIName) - .Case("ilp32s", ABI_ILP32S) -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -index c5f072677..cdbd1f569 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -@@ -54,6 +54,7 @@ enum ABI { - ABI_Unknown - }; - -+ABI computeTargetABI(const Triple &TT, StringRef ABIName); - ABI getTargetABI(StringRef ABIName); - - // Returns the register used to hold the stack pointer after realignment. -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -index 57330dd31..a6b9c0652 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, - case FK_Data_4: - return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; - case FK_Data_8: -- return ELF::R_LARCH_64; -+ return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64; - case LoongArch::fixup_loongarch_b16: - return ELF::R_LARCH_B16; - case LoongArch::fixup_loongarch_b21: -diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp -index 3410c8f42..a6e15e094 100644 ---- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp -+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp -@@ -12,6 +12,7 @@ - - #include "LoongArchELFStreamer.h" - #include "LoongArchAsmBackend.h" -+#include "LoongArchBaseInfo.h" - #include "llvm/BinaryFormat/ELF.h" - #include "llvm/MC/MCAssembler.h" - #include "llvm/MC/MCCodeEmitter.h" -@@ -23,9 +24,10 @@ using namespace llvm; - LoongArchTargetELFStreamer::LoongArchTargetELFStreamer( - MCStreamer &S, const MCSubtargetInfo &STI) - : LoongArchTargetStreamer(S) { -- // FIXME: select appropriate ABI. -- setTargetABI(STI.getTargetTriple().isArch64Bit() ? LoongArchABI::ABI_LP64D -- : LoongArchABI::ABI_ILP32D); -+ auto &MAB = static_cast( -+ getStreamer().getAssembler().getBackend()); -+ setTargetABI(LoongArchABI::computeTargetABI( -+ STI.getTargetTriple(), MAB.getTargetOptions().getABIName())); - } - - MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { -diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp -index 80ebe0fa5..f6ea8d290 100644 ---- a/llvm/lib/TargetParser/Host.cpp -+++ b/llvm/lib/TargetParser/Host.cpp -@@ -1448,6 +1448,20 @@ StringRef sys::getHostCPUName() { - return "generic"; - } - } -+#elif defined(__loongarch__) -+StringRef sys::getHostCPUName() { -+ // Use processor id to detect cpu name. -+ uint32_t processor_id; -+ __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); -+ switch (processor_id & 0xff00) { -+ case 0xc000: // Loongson 64bit, 4-issue -+ return "la464"; -+ // TODO: Others. -+ default: -+ break; -+ } -+ return "generic"; -+} - #elif defined(__riscv) - StringRef sys::getHostCPUName() { - #if defined(__linux__) -@@ -1842,6 +1856,23 @@ bool sys::getHostCPUFeatures(StringMap &Features) { - - return true; - } -+#elif defined(__linux__) && defined(__loongarch__) -+#include -+bool sys::getHostCPUFeatures(StringMap &Features) { -+ unsigned long hwcap = getauxval(AT_HWCAP); -+ bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU -+ uint32_t cpucfg2 = 0x2; -+ __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); -+ -+ Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP -+ Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP -+ -+ Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX -+ Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX -+ Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ -+ -+ return true; -+} - #else - bool sys::getHostCPUFeatures(StringMap &Features) { return false; } - #endif -diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -index faa8c314f..772d24c5c 100644 ---- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp -+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -@@ -1,4 +1,4 @@ --//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=// -+//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. -@@ -27,12 +27,11 @@ const ArchInfo AllArchs[] = { - #include "llvm/TargetParser/LoongArchTargetParser.def" - }; - --LoongArch::ArchKind LoongArch::parseArch(StringRef Arch) { -+bool LoongArch::isValidArchName(StringRef Arch) { - for (const auto A : AllArchs) - if (A.Name == Arch) -- return A.Kind; -- -- return LoongArch::ArchKind::AK_INVALID; -+ return true; -+ return false; - } - - bool LoongArch::getArchFeatures(StringRef Arch, -@@ -40,10 +39,22 @@ bool LoongArch::getArchFeatures(StringRef Arch, - for (const auto A : AllArchs) { - if (A.Name == Arch) { - for (const auto F : AllFeatures) -- if ((A.Features & F.Kind) == F.Kind && F.Kind != FK_INVALID) -+ if ((A.Features & F.Kind) == F.Kind) - Features.push_back(F.Name); - return true; - } - } - return false; - } -+ -+bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } -+ -+void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { -+ for (const auto A : AllArchs) -+ Values.emplace_back(A.Name); -+} -+ -+StringRef LoongArch::getDefaultArch(bool Is64Bit) { -+ // TODO: use a real 32-bit arch name. -+ return Is64Bit ? "loongarch64" : ""; -+} -diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -index 599eeeabc..367a2bef2 100644 ---- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -@@ -492,7 +492,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, - bool IsMIPS64 = TargetTriple.isMIPS64(); - bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); - bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; -- bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64; -+ bool IsLoongArch64 = TargetTriple.isLoongArch64(); - bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; - bool IsWindows = TargetTriple.isOSWindows(); - bool IsFuchsia = TargetTriple.isOSFuchsia(); -diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -index fe8b8ce0d..603fa97e1 100644 ---- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp -@@ -434,6 +434,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = { - 0x0200000000000, // OriginBase - }; - -+// loongarch64 Linux -+static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = { -+ 0, // AndMask (not used) -+ 0x500000000000, // XorMask -+ 0, // ShadowBase (not used) -+ 0x100000000000, // OriginBase -+}; -+ - // aarch64 FreeBSD - static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = { - 0x1800000000000, // AndMask -@@ -491,6 +499,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { - &Linux_AArch64_MemoryMapParams, - }; - -+static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = { -+ nullptr, -+ &Linux_LoongArch64_MemoryMapParams, -+}; -+ - static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = { - nullptr, - &FreeBSD_AArch64_MemoryMapParams, -@@ -537,6 +550,7 @@ private: - friend struct VarArgAArch64Helper; - friend struct VarArgPowerPC64Helper; - friend struct VarArgSystemZHelper; -+ friend struct VarArgLoongArch64Helper; - - void initializeModule(Module &M); - void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI); -@@ -986,6 +1000,9 @@ void MemorySanitizer::initializeModule(Module &M) { - case Triple::aarch64_be: - MapParams = Linux_ARM_MemoryMapParams.bits64; - break; -+ case Triple::loongarch64: -+ MapParams = Linux_LoongArch_MemoryMapParams.bits64; -+ break; - default: - report_fatal_error("unsupported architecture"); - } -@@ -5709,6 +5726,123 @@ struct VarArgSystemZHelper : public VarArgHelper { - } - }; - -+/// LoongArch64-specific implementation of VarArgHelper. -+struct VarArgLoongArch64Helper : public VarArgHelper { -+ Function &F; -+ MemorySanitizer &MS; -+ MemorySanitizerVisitor &MSV; -+ AllocaInst *VAArgTLSCopy = nullptr; -+ Value *VAArgSize = nullptr; -+ -+ SmallVector VAStartInstrumentationList; -+ -+ VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, -+ MemorySanitizerVisitor &MSV) -+ : F(F), MS(MS), MSV(MSV) {} -+ -+ void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { -+ unsigned VAArgOffset = 0; -+ const DataLayout &DL = F.getParent()->getDataLayout(); -+ for (Value *A : -+ llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) { -+ Value *Base; -+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); -+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); -+ VAArgOffset += ArgSize; -+ VAArgOffset = alignTo(VAArgOffset, 8); -+ if (!Base) -+ continue; -+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); -+ } -+ -+ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); -+ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of -+ // a new class member i.e. it is the total size of all VarArgs. -+ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); -+ } -+ -+ /// Compute the shadow address for a given va_arg. -+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, -+ unsigned ArgOffset, unsigned ArgSize) { -+ // Make sure we don't overflow __msan_va_arg_tls. -+ if (ArgOffset + ArgSize > kParamTLSSize) -+ return nullptr; -+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); -+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); -+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), -+ "_msarg"); -+ } -+ -+ void visitVAStartInst(VAStartInst &I) override { -+ IRBuilder<> IRB(&I); -+ VAStartInstrumentationList.push_back(&I); -+ Value *VAListTag = I.getArgOperand(0); -+ Value *ShadowPtr, *OriginPtr; -+ const Align Alignment = Align(8); -+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( -+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); -+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -+ /* size */ 8, Alignment, false); -+ } -+ -+ void visitVACopyInst(VACopyInst &I) override { -+ IRBuilder<> IRB(&I); -+ VAStartInstrumentationList.push_back(&I); -+ Value *VAListTag = I.getArgOperand(0); -+ Value *ShadowPtr, *OriginPtr; -+ const Align Alignment = Align(8); -+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( -+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); -+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), -+ /* size */ 8, Alignment, false); -+ } -+ -+ void finalizeInstrumentation() override { -+ assert(!VAArgSize && !VAArgTLSCopy && -+ "finalizeInstrumentation called twice"); -+ IRBuilder<> IRB(MSV.FnPrologueEnd); -+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); -+ Value *CopySize = -+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize); -+ -+ if (!VAStartInstrumentationList.empty()) { -+ // If there is a va_start in this function, make a backup copy of -+ // va_arg_tls somewhere in the function entry block. -+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); -+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment); -+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()), -+ CopySize, kShadowTLSAlignment, false); -+ -+ Value *SrcSize = IRB.CreateBinaryIntrinsic( -+ Intrinsic::umin, CopySize, -+ ConstantInt::get(MS.IntptrTy, kParamTLSSize)); -+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS, -+ kShadowTLSAlignment, SrcSize); -+ } -+ -+ // Instrument va_start. -+ // Copy va_list shadow from the backup copy of the TLS contents. -+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { -+ CallInst *OrigInst = VAStartInstrumentationList[i]; -+ NextNodeIRBuilder IRB(OrigInst); -+ Value *VAListTag = OrigInst->getArgOperand(0); -+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); -+ Value *RegSaveAreaPtrPtr = -+ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), -+ PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); -+ Value *RegSaveAreaPtr = -+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); -+ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; -+ const Align Alignment = Align(8); -+ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = -+ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), -+ Alignment, /*isStore*/ true); -+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, -+ CopySize); -+ } -+ } -+}; -+ - /// A no-op implementation of VarArgHelper. - struct VarArgNoOpHelper : public VarArgHelper { - VarArgNoOpHelper(Function &F, MemorySanitizer &MS, -@@ -5741,6 +5875,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, - return new VarArgPowerPC64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == Triple::systemz) - return new VarArgSystemZHelper(Func, Msan, Visitor); -+ else if (TargetTriple.getArch() == Triple::loongarch64) -+ return new VarArgLoongArch64Helper(Func, Msan, Visitor); - else - return new VarArgNoOpHelper(Func, Msan, Visitor); - } -diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll -new file mode 100644 -index 000000000..08fff9f8c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll -@@ -0,0 +1,403 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s -+; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s | FileCheck %s -+ -+;; This file contains tests that should have identical output for all ABIs, i.e. -+;; where no arguments are passed via floating point registers. -+ -+;; Check that on LA64, i128 is passed in a pair of GPRs. -+define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind { -+; CHECK-LABEL: callee_i128_in_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: ret -+ %b_trunc = trunc i128 %b to i64 -+ %1 = add i64 %a, %b_trunc -+ ret i64 %1 -+} -+ -+define i64 @caller_i128_in_regs() nounwind { -+; CHECK-LABEL: caller_i128_in_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: ori $a1, $zero, 2 -+; CHECK-NEXT: move $a2, $zero -+; CHECK-NEXT: bl %plt(callee_i128_in_regs) -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_i128_in_regs(i64 1, i128 2) -+ ret i64 %1 -+} -+ -+;; Check that the stack is used once the GPRs are exhausted. -+define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind { -+; CHECK-LABEL: callee_many_scalars: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ld.d $t0, $sp, 0 -+; CHECK-NEXT: xor $a5, $a5, $t0 -+; CHECK-NEXT: xor $a4, $a4, $a7 -+; CHECK-NEXT: or $a4, $a4, $a5 -+; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0 -+; CHECK-NEXT: andi $a0, $a0, 255 -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0 -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: add.d $a0, $a0, $a3 -+; CHECK-NEXT: sltui $a1, $a4, 1 -+; CHECK-NEXT: add.d $a0, $a1, $a0 -+; CHECK-NEXT: add.d $a0, $a0, $a6 -+; CHECK-NEXT: ld.d $a1, $sp, 8 -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: ret -+ %a_ext = zext i8 %a to i64 -+ %b_ext = zext i16 %b to i64 -+ %c_ext = zext i32 %c to i64 -+ %1 = add i64 %a_ext, %b_ext -+ %2 = add i64 %1, %c_ext -+ %3 = add i64 %2, %d -+ %4 = icmp eq i128 %e, %g -+ %5 = zext i1 %4 to i64 -+ %6 = add i64 %5, %3 -+ %7 = add i64 %6, %f -+ %8 = add i64 %7, %h -+ ret i64 %8 -+} -+ -+define i64 @caller_many_scalars() nounwind { -+; CHECK-LABEL: caller_many_scalars: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -32 -+; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -+; CHECK-NEXT: ori $a0, $zero, 8 -+; CHECK-NEXT: st.d $a0, $sp, 8 -+; CHECK-NEXT: st.d $zero, $sp, 0 -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: ori $a1, $zero, 2 -+; CHECK-NEXT: ori $a2, $zero, 3 -+; CHECK-NEXT: ori $a3, $zero, 4 -+; CHECK-NEXT: ori $a4, $zero, 5 -+; CHECK-NEXT: ori $a6, $zero, 6 -+; CHECK-NEXT: ori $a7, $zero, 7 -+; CHECK-NEXT: move $a5, $zero -+; CHECK-NEXT: bl %plt(callee_many_scalars) -+; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 32 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8) -+ ret i64 %1 -+} -+ -+;; Check that i256 is passed indirectly. -+ -+define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { -+; CHECK-LABEL: callee_large_scalars: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ld.d $a2, $a1, 24 -+; CHECK-NEXT: ld.d $a3, $a0, 24 -+; CHECK-NEXT: xor $a2, $a3, $a2 -+; CHECK-NEXT: ld.d $a3, $a1, 8 -+; CHECK-NEXT: ld.d $a4, $a0, 8 -+; CHECK-NEXT: xor $a3, $a4, $a3 -+; CHECK-NEXT: or $a2, $a3, $a2 -+; CHECK-NEXT: ld.d $a3, $a1, 16 -+; CHECK-NEXT: ld.d $a4, $a0, 16 -+; CHECK-NEXT: xor $a3, $a4, $a3 -+; CHECK-NEXT: ld.d $a1, $a1, 0 -+; CHECK-NEXT: ld.d $a0, $a0, 0 -+; CHECK-NEXT: xor $a0, $a0, $a1 -+; CHECK-NEXT: or $a0, $a0, $a3 -+; CHECK-NEXT: or $a0, $a0, $a2 -+; CHECK-NEXT: sltui $a0, $a0, 1 -+; CHECK-NEXT: ret -+ %1 = icmp eq i256 %a, %b -+ %2 = zext i1 %1 to i64 -+ ret i64 %2 -+} -+ -+define i64 @caller_large_scalars() nounwind { -+; CHECK-LABEL: caller_large_scalars: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -80 -+; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: st.d $a0, $sp, 0 -+; CHECK-NEXT: st.d $zero, $sp, 24 -+; CHECK-NEXT: st.d $zero, $sp, 16 -+; CHECK-NEXT: st.d $zero, $sp, 8 -+; CHECK-NEXT: st.d $zero, $sp, 56 -+; CHECK-NEXT: st.d $zero, $sp, 48 -+; CHECK-NEXT: st.d $zero, $sp, 40 -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: st.d $a0, $sp, 32 -+; CHECK-NEXT: addi.d $a0, $sp, 32 -+; CHECK-NEXT: addi.d $a1, $sp, 0 -+; CHECK-NEXT: bl %plt(callee_large_scalars) -+; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 80 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_large_scalars(i256 1, i256 2) -+ ret i64 %1 -+} -+ -+;; Check that arguments larger than 2*GRLen are handled correctly when their -+;; address is passed on the stack rather than in memory. -+ -+;; Must keep define on a single line due to an update_llc_test_checks.py limitation -+define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { -+; CHECK-LABEL: callee_large_scalars_exhausted_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ld.d $a0, $sp, 8 -+; CHECK-NEXT: ld.d $a1, $a0, 24 -+; CHECK-NEXT: ld.d $a2, $a7, 24 -+; CHECK-NEXT: xor $a1, $a2, $a1 -+; CHECK-NEXT: ld.d $a2, $a0, 8 -+; CHECK-NEXT: ld.d $a3, $a7, 8 -+; CHECK-NEXT: xor $a2, $a3, $a2 -+; CHECK-NEXT: or $a1, $a2, $a1 -+; CHECK-NEXT: ld.d $a2, $a0, 16 -+; CHECK-NEXT: ld.d $a3, $a7, 16 -+; CHECK-NEXT: xor $a2, $a3, $a2 -+; CHECK-NEXT: ld.d $a0, $a0, 0 -+; CHECK-NEXT: ld.d $a3, $a7, 0 -+; CHECK-NEXT: xor $a0, $a3, $a0 -+; CHECK-NEXT: or $a0, $a0, $a2 -+; CHECK-NEXT: or $a0, $a0, $a1 -+; CHECK-NEXT: sltui $a0, $a0, 1 -+; CHECK-NEXT: ret -+ %1 = icmp eq i256 %h, %j -+ %2 = zext i1 %1 to i64 -+ ret i64 %2 -+} -+ -+define i64 @caller_large_scalars_exhausted_regs() nounwind { -+; CHECK-LABEL: caller_large_scalars_exhausted_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -96 -+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $a0, $sp, 16 -+; CHECK-NEXT: st.d $a0, $sp, 8 -+; CHECK-NEXT: ori $a0, $zero, 9 -+; CHECK-NEXT: st.d $a0, $sp, 0 -+; CHECK-NEXT: ori $a0, $zero, 10 -+; CHECK-NEXT: st.d $a0, $sp, 16 -+; CHECK-NEXT: st.d $zero, $sp, 40 -+; CHECK-NEXT: st.d $zero, $sp, 32 -+; CHECK-NEXT: st.d $zero, $sp, 24 -+; CHECK-NEXT: st.d $zero, $sp, 72 -+; CHECK-NEXT: st.d $zero, $sp, 64 -+; CHECK-NEXT: st.d $zero, $sp, 56 -+; CHECK-NEXT: ori $a0, $zero, 8 -+; CHECK-NEXT: st.d $a0, $sp, 48 -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: ori $a1, $zero, 2 -+; CHECK-NEXT: ori $a2, $zero, 3 -+; CHECK-NEXT: ori $a3, $zero, 4 -+; CHECK-NEXT: ori $a4, $zero, 5 -+; CHECK-NEXT: ori $a5, $zero, 6 -+; CHECK-NEXT: ori $a6, $zero, 7 -+; CHECK-NEXT: addi.d $a7, $sp, 48 -+; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) -+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 96 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_large_scalars_exhausted_regs( -+ i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, -+ i256 10) -+ ret i64 %1 -+} -+ -+;; Check large struct arguments, which are passed byval -+ -+%struct.large = type { i64, i64, i64, i64 } -+ -+define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind { -+; CHECK-LABEL: callee_large_struct: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ld.d $a1, $a0, 24 -+; CHECK-NEXT: ld.d $a0, $a0, 0 -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: ret -+ %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0 -+ %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3 -+ %3 = load i64, ptr %1 -+ %4 = load i64, ptr %2 -+ %5 = add i64 %3, %4 -+ ret i64 %5 -+} -+ -+define i64 @caller_large_struct() nounwind { -+; CHECK-LABEL: caller_large_struct: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -80 -+; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: st.d $a0, $sp, 40 -+; CHECK-NEXT: st.d $a0, $sp, 8 -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: st.d $a0, $sp, 48 -+; CHECK-NEXT: st.d $a0, $sp, 16 -+; CHECK-NEXT: ori $a0, $zero, 3 -+; CHECK-NEXT: st.d $a0, $sp, 56 -+; CHECK-NEXT: st.d $a0, $sp, 24 -+; CHECK-NEXT: ori $a0, $zero, 4 -+; CHECK-NEXT: st.d $a0, $sp, 64 -+; CHECK-NEXT: st.d $a0, $sp, 32 -+; CHECK-NEXT: addi.d $a0, $sp, 8 -+; CHECK-NEXT: bl %plt(callee_large_struct) -+; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 80 -+; CHECK-NEXT: ret -+ %ls = alloca %struct.large, align 8 -+ %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0 -+ store i64 1, ptr %a -+ %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1 -+ store i64 2, ptr %b -+ %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2 -+ store i64 3, ptr %c -+ %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3 -+ store i64 4, ptr %d -+ %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls) -+ ret i64 %1 -+} -+ -+;; Check return scalar which size is 2*GRLen. -+ -+define i128 @callee_small_scalar_ret() nounwind { -+; CHECK-LABEL: callee_small_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.w $a0, $zero, -1 -+; CHECK-NEXT: move $a1, $a0 -+; CHECK-NEXT: ret -+ ret i128 -1 -+} -+ -+define i64 @caller_small_scalar_ret() nounwind { -+; CHECK-LABEL: caller_small_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: bl %plt(callee_small_scalar_ret) -+; CHECK-NEXT: addi.w $a2, $zero, -2 -+; CHECK-NEXT: xor $a0, $a0, $a2 -+; CHECK-NEXT: orn $a0, $a0, $a1 -+; CHECK-NEXT: sltui $a0, $a0, 1 -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call i128 @callee_small_scalar_ret() -+ %2 = icmp eq i128 -2, %1 -+ %3 = zext i1 %2 to i64 -+ ret i64 %3 -+} -+ -+;; Check return struct which size is 2*GRLen. -+ -+%struct.small = type { i64, ptr } -+ -+define %struct.small @callee_small_struct_ret() nounwind { -+; CHECK-LABEL: callee_small_struct_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: move $a1, $zero -+; CHECK-NEXT: ret -+ ret %struct.small { i64 1, ptr null } -+} -+ -+define i64 @caller_small_struct_ret() nounwind { -+; CHECK-LABEL: caller_small_struct_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: bl %plt(callee_small_struct_ret) -+; CHECK-NEXT: add.d $a0, $a0, $a1 -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call %struct.small @callee_small_struct_ret() -+ %2 = extractvalue %struct.small %1, 0 -+ %3 = extractvalue %struct.small %1, 1 -+ %4 = ptrtoint ptr %3 to i64 -+ %5 = add i64 %2, %4 -+ ret i64 %5 -+} -+ -+;; Check return scalar which size is more than 2*GRLen. -+ -+define i256 @callee_large_scalar_ret() nounwind { -+; CHECK-LABEL: callee_large_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.w $a1, $zero, -1 -+; CHECK-NEXT: st.d $a1, $a0, 24 -+; CHECK-NEXT: st.d $a1, $a0, 16 -+; CHECK-NEXT: st.d $a1, $a0, 8 -+; CHECK-NEXT: lu12i.w $a1, -30141 -+; CHECK-NEXT: ori $a1, $a1, 747 -+; CHECK-NEXT: st.d $a1, $a0, 0 -+; CHECK-NEXT: ret -+ ret i256 -123456789 -+} -+ -+define void @caller_large_scalar_ret() nounwind { -+; CHECK-LABEL: caller_large_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -48 -+; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $a0, $sp, 0 -+; CHECK-NEXT: bl %plt(callee_large_scalar_ret) -+; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 48 -+; CHECK-NEXT: ret -+ %1 = call i256 @callee_large_scalar_ret() -+ ret void -+} -+ -+;; Check return struct which size is more than 2*GRLen. -+ -+define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { -+; CHECK-LABEL: callee_large_struct_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ori $a1, $zero, 4 -+; CHECK-NEXT: st.d $a1, $a0, 24 -+; CHECK-NEXT: ori $a1, $zero, 3 -+; CHECK-NEXT: st.d $a1, $a0, 16 -+; CHECK-NEXT: ori $a1, $zero, 2 -+; CHECK-NEXT: st.d $a1, $a0, 8 -+; CHECK-NEXT: ori $a1, $zero, 1 -+; CHECK-NEXT: st.d $a1, $a0, 0 -+; CHECK-NEXT: ret -+ %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 -+ store i64 1, ptr %a, align 4 -+ %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 -+ store i64 2, ptr %b, align 4 -+ %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2 -+ store i64 3, ptr %c, align 4 -+ %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3 -+ store i64 4, ptr %d, align 4 -+ ret void -+} -+ -+define i64 @caller_large_struct_ret() nounwind { -+; CHECK-LABEL: caller_large_struct_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -48 -+; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -+; CHECK-NEXT: addi.d $a0, $sp, 8 -+; CHECK-NEXT: bl %plt(callee_large_struct_ret) -+; CHECK-NEXT: ld.d $a0, $sp, 32 -+; CHECK-NEXT: ld.d $a1, $sp, 8 -+; CHECK-NEXT: add.d $a0, $a1, $a0 -+; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 48 -+; CHECK-NEXT: ret -+ %1 = alloca %struct.large -+ call void @callee_large_struct_ret(ptr sret(%struct.large) %1) -+ %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0 -+ %3 = load i64, ptr %2 -+ %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3 -+ %5 = load i64, ptr %4 -+ %6 = add i64 %3, %5 -+ ret i64 %6 -+} -diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll -index ae2ce7291..ceb38876c 100644 ---- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll -+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll -@@ -2,406 +2,7 @@ - ; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ - ; RUN: | FileCheck %s - --;; Check that on LA64, i128 is passed in a pair of GPRs. --define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind { --; CHECK-LABEL: callee_i128_in_regs: --; CHECK: # %bb.0: --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: ret -- %b_trunc = trunc i128 %b to i64 -- %1 = add i64 %a, %b_trunc -- ret i64 %1 --} -- --define i64 @caller_i128_in_regs() nounwind { --; CHECK-LABEL: caller_i128_in_regs: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -16 --; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: ori $a1, $zero, 2 --; CHECK-NEXT: move $a2, $zero --; CHECK-NEXT: bl %plt(callee_i128_in_regs) --; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 16 --; CHECK-NEXT: ret -- %1 = call i64 @callee_i128_in_regs(i64 1, i128 2) -- ret i64 %1 --} -- --;; Check that the stack is used once the GPRs are exhausted. --define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind { --; CHECK-LABEL: callee_many_scalars: --; CHECK: # %bb.0: --; CHECK-NEXT: ld.d $t0, $sp, 0 --; CHECK-NEXT: xor $a5, $a5, $t0 --; CHECK-NEXT: xor $a4, $a4, $a7 --; CHECK-NEXT: or $a4, $a4, $a5 --; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0 --; CHECK-NEXT: andi $a0, $a0, 255 --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0 --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: add.d $a0, $a0, $a3 --; CHECK-NEXT: sltui $a1, $a4, 1 --; CHECK-NEXT: add.d $a0, $a1, $a0 --; CHECK-NEXT: add.d $a0, $a0, $a6 --; CHECK-NEXT: ld.d $a1, $sp, 8 --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: ret -- %a_ext = zext i8 %a to i64 -- %b_ext = zext i16 %b to i64 -- %c_ext = zext i32 %c to i64 -- %1 = add i64 %a_ext, %b_ext -- %2 = add i64 %1, %c_ext -- %3 = add i64 %2, %d -- %4 = icmp eq i128 %e, %g -- %5 = zext i1 %4 to i64 -- %6 = add i64 %5, %3 -- %7 = add i64 %6, %f -- %8 = add i64 %7, %h -- ret i64 %8 --} -- --define i64 @caller_many_scalars() nounwind { --; CHECK-LABEL: caller_many_scalars: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -32 --; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill --; CHECK-NEXT: ori $a0, $zero, 8 --; CHECK-NEXT: st.d $a0, $sp, 8 --; CHECK-NEXT: st.d $zero, $sp, 0 --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: ori $a1, $zero, 2 --; CHECK-NEXT: ori $a2, $zero, 3 --; CHECK-NEXT: ori $a3, $zero, 4 --; CHECK-NEXT: ori $a4, $zero, 5 --; CHECK-NEXT: ori $a6, $zero, 6 --; CHECK-NEXT: ori $a7, $zero, 7 --; CHECK-NEXT: move $a5, $zero --; CHECK-NEXT: bl %plt(callee_many_scalars) --; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 32 --; CHECK-NEXT: ret -- %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8) -- ret i64 %1 --} -- --;; Check that i256 is passed indirectly. -- --define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { --; CHECK-LABEL: callee_large_scalars: --; CHECK: # %bb.0: --; CHECK-NEXT: ld.d $a2, $a1, 24 --; CHECK-NEXT: ld.d $a3, $a0, 24 --; CHECK-NEXT: xor $a2, $a3, $a2 --; CHECK-NEXT: ld.d $a3, $a1, 8 --; CHECK-NEXT: ld.d $a4, $a0, 8 --; CHECK-NEXT: xor $a3, $a4, $a3 --; CHECK-NEXT: or $a2, $a3, $a2 --; CHECK-NEXT: ld.d $a3, $a1, 16 --; CHECK-NEXT: ld.d $a4, $a0, 16 --; CHECK-NEXT: xor $a3, $a4, $a3 --; CHECK-NEXT: ld.d $a1, $a1, 0 --; CHECK-NEXT: ld.d $a0, $a0, 0 --; CHECK-NEXT: xor $a0, $a0, $a1 --; CHECK-NEXT: or $a0, $a0, $a3 --; CHECK-NEXT: or $a0, $a0, $a2 --; CHECK-NEXT: sltui $a0, $a0, 1 --; CHECK-NEXT: ret -- %1 = icmp eq i256 %a, %b -- %2 = zext i1 %1 to i64 -- ret i64 %2 --} -- --define i64 @caller_large_scalars() nounwind { --; CHECK-LABEL: caller_large_scalars: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -80 --; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill --; CHECK-NEXT: ori $a0, $zero, 2 --; CHECK-NEXT: st.d $a0, $sp, 0 --; CHECK-NEXT: st.d $zero, $sp, 24 --; CHECK-NEXT: st.d $zero, $sp, 16 --; CHECK-NEXT: st.d $zero, $sp, 8 --; CHECK-NEXT: st.d $zero, $sp, 56 --; CHECK-NEXT: st.d $zero, $sp, 48 --; CHECK-NEXT: st.d $zero, $sp, 40 --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: st.d $a0, $sp, 32 --; CHECK-NEXT: addi.d $a0, $sp, 32 --; CHECK-NEXT: addi.d $a1, $sp, 0 --; CHECK-NEXT: bl %plt(callee_large_scalars) --; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 80 --; CHECK-NEXT: ret -- %1 = call i64 @callee_large_scalars(i256 1, i256 2) -- ret i64 %1 --} -- --;; Check that arguments larger than 2*GRLen are handled correctly when their --;; address is passed on the stack rather than in memory. -- --;; Must keep define on a single line due to an update_llc_test_checks.py limitation --define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { --; CHECK-LABEL: callee_large_scalars_exhausted_regs: --; CHECK: # %bb.0: --; CHECK-NEXT: ld.d $a0, $sp, 8 --; CHECK-NEXT: ld.d $a1, $a0, 24 --; CHECK-NEXT: ld.d $a2, $a7, 24 --; CHECK-NEXT: xor $a1, $a2, $a1 --; CHECK-NEXT: ld.d $a2, $a0, 8 --; CHECK-NEXT: ld.d $a3, $a7, 8 --; CHECK-NEXT: xor $a2, $a3, $a2 --; CHECK-NEXT: or $a1, $a2, $a1 --; CHECK-NEXT: ld.d $a2, $a0, 16 --; CHECK-NEXT: ld.d $a3, $a7, 16 --; CHECK-NEXT: xor $a2, $a3, $a2 --; CHECK-NEXT: ld.d $a0, $a0, 0 --; CHECK-NEXT: ld.d $a3, $a7, 0 --; CHECK-NEXT: xor $a0, $a3, $a0 --; CHECK-NEXT: or $a0, $a0, $a2 --; CHECK-NEXT: or $a0, $a0, $a1 --; CHECK-NEXT: sltui $a0, $a0, 1 --; CHECK-NEXT: ret -- %1 = icmp eq i256 %h, %j -- %2 = zext i1 %1 to i64 -- ret i64 %2 --} -- --define i64 @caller_large_scalars_exhausted_regs() nounwind { --; CHECK-LABEL: caller_large_scalars_exhausted_regs: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -96 --; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill --; CHECK-NEXT: addi.d $a0, $sp, 16 --; CHECK-NEXT: st.d $a0, $sp, 8 --; CHECK-NEXT: ori $a0, $zero, 9 --; CHECK-NEXT: st.d $a0, $sp, 0 --; CHECK-NEXT: ori $a0, $zero, 10 --; CHECK-NEXT: st.d $a0, $sp, 16 --; CHECK-NEXT: st.d $zero, $sp, 40 --; CHECK-NEXT: st.d $zero, $sp, 32 --; CHECK-NEXT: st.d $zero, $sp, 24 --; CHECK-NEXT: st.d $zero, $sp, 72 --; CHECK-NEXT: st.d $zero, $sp, 64 --; CHECK-NEXT: st.d $zero, $sp, 56 --; CHECK-NEXT: ori $a0, $zero, 8 --; CHECK-NEXT: st.d $a0, $sp, 48 --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: ori $a1, $zero, 2 --; CHECK-NEXT: ori $a2, $zero, 3 --; CHECK-NEXT: ori $a3, $zero, 4 --; CHECK-NEXT: ori $a4, $zero, 5 --; CHECK-NEXT: ori $a5, $zero, 6 --; CHECK-NEXT: ori $a6, $zero, 7 --; CHECK-NEXT: addi.d $a7, $sp, 48 --; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) --; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 96 --; CHECK-NEXT: ret -- %1 = call i64 @callee_large_scalars_exhausted_regs( -- i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, -- i256 10) -- ret i64 %1 --} -- --;; Check large struct arguments, which are passed byval -- --%struct.large = type { i64, i64, i64, i64 } -- --define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind { --; CHECK-LABEL: callee_large_struct: --; CHECK: # %bb.0: --; CHECK-NEXT: ld.d $a1, $a0, 24 --; CHECK-NEXT: ld.d $a0, $a0, 0 --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: ret -- %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0 -- %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3 -- %3 = load i64, ptr %1 -- %4 = load i64, ptr %2 -- %5 = add i64 %3, %4 -- ret i64 %5 --} -- --define i64 @caller_large_struct() nounwind { --; CHECK-LABEL: caller_large_struct: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -80 --; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: st.d $a0, $sp, 40 --; CHECK-NEXT: st.d $a0, $sp, 8 --; CHECK-NEXT: ori $a0, $zero, 2 --; CHECK-NEXT: st.d $a0, $sp, 48 --; CHECK-NEXT: st.d $a0, $sp, 16 --; CHECK-NEXT: ori $a0, $zero, 3 --; CHECK-NEXT: st.d $a0, $sp, 56 --; CHECK-NEXT: st.d $a0, $sp, 24 --; CHECK-NEXT: ori $a0, $zero, 4 --; CHECK-NEXT: st.d $a0, $sp, 64 --; CHECK-NEXT: st.d $a0, $sp, 32 --; CHECK-NEXT: addi.d $a0, $sp, 8 --; CHECK-NEXT: bl %plt(callee_large_struct) --; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 80 --; CHECK-NEXT: ret -- %ls = alloca %struct.large, align 8 -- %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0 -- store i64 1, ptr %a -- %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1 -- store i64 2, ptr %b -- %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2 -- store i64 3, ptr %c -- %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3 -- store i64 4, ptr %d -- %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls) -- ret i64 %1 --} -- --;; Check return scalar which size is 2*GRLen. -- --define i128 @callee_small_scalar_ret() nounwind { --; CHECK-LABEL: callee_small_scalar_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.w $a0, $zero, -1 --; CHECK-NEXT: move $a1, $a0 --; CHECK-NEXT: ret -- ret i128 -1 --} -- --define i64 @caller_small_scalar_ret() nounwind { --; CHECK-LABEL: caller_small_scalar_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -16 --; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; CHECK-NEXT: bl %plt(callee_small_scalar_ret) --; CHECK-NEXT: addi.w $a2, $zero, -2 --; CHECK-NEXT: xor $a0, $a0, $a2 --; CHECK-NEXT: orn $a0, $a0, $a1 --; CHECK-NEXT: sltui $a0, $a0, 1 --; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 16 --; CHECK-NEXT: ret -- %1 = call i128 @callee_small_scalar_ret() -- %2 = icmp eq i128 -2, %1 -- %3 = zext i1 %2 to i64 -- ret i64 %3 --} -- --;; Check return struct which size is 2*GRLen. -- --%struct.small = type { i64, ptr } -- --define %struct.small @callee_small_struct_ret() nounwind { --; CHECK-LABEL: callee_small_struct_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: ori $a0, $zero, 1 --; CHECK-NEXT: move $a1, $zero --; CHECK-NEXT: ret -- ret %struct.small { i64 1, ptr null } --} -- --define i64 @caller_small_struct_ret() nounwind { --; CHECK-LABEL: caller_small_struct_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -16 --; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; CHECK-NEXT: bl %plt(callee_small_struct_ret) --; CHECK-NEXT: add.d $a0, $a0, $a1 --; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 16 --; CHECK-NEXT: ret -- %1 = call %struct.small @callee_small_struct_ret() -- %2 = extractvalue %struct.small %1, 0 -- %3 = extractvalue %struct.small %1, 1 -- %4 = ptrtoint ptr %3 to i64 -- %5 = add i64 %2, %4 -- ret i64 %5 --} -- --;; Check return scalar which size is more than 2*GRLen. -- --define i256 @callee_large_scalar_ret() nounwind { --; CHECK-LABEL: callee_large_scalar_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.w $a1, $zero, -1 --; CHECK-NEXT: st.d $a1, $a0, 24 --; CHECK-NEXT: st.d $a1, $a0, 16 --; CHECK-NEXT: st.d $a1, $a0, 8 --; CHECK-NEXT: lu12i.w $a1, -30141 --; CHECK-NEXT: ori $a1, $a1, 747 --; CHECK-NEXT: st.d $a1, $a0, 0 --; CHECK-NEXT: ret -- ret i256 -123456789 --} -- --define void @caller_large_scalar_ret() nounwind { --; CHECK-LABEL: caller_large_scalar_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -48 --; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill --; CHECK-NEXT: addi.d $a0, $sp, 0 --; CHECK-NEXT: bl %plt(callee_large_scalar_ret) --; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 48 --; CHECK-NEXT: ret -- %1 = call i256 @callee_large_scalar_ret() -- ret void --} -- --;; Check return struct which size is more than 2*GRLen. -- --define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { --; CHECK-LABEL: callee_large_struct_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: ori $a1, $zero, 4 --; CHECK-NEXT: st.w $a1, $a0, 24 --; CHECK-NEXT: ori $a1, $zero, 3 --; CHECK-NEXT: st.w $a1, $a0, 16 --; CHECK-NEXT: ori $a1, $zero, 2 --; CHECK-NEXT: st.w $a1, $a0, 8 --; CHECK-NEXT: st.w $zero, $a0, 28 --; CHECK-NEXT: st.w $zero, $a0, 20 --; CHECK-NEXT: st.w $zero, $a0, 12 --; CHECK-NEXT: st.w $zero, $a0, 4 --; CHECK-NEXT: ori $a1, $zero, 1 --; CHECK-NEXT: st.w $a1, $a0, 0 --; CHECK-NEXT: ret -- %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 -- store i64 1, ptr %a, align 4 -- %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 -- store i64 2, ptr %b, align 4 -- %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2 -- store i64 3, ptr %c, align 4 -- %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3 -- store i64 4, ptr %d, align 4 -- ret void --} -- --define i64 @caller_large_struct_ret() nounwind { --; CHECK-LABEL: caller_large_struct_ret: --; CHECK: # %bb.0: --; CHECK-NEXT: addi.d $sp, $sp, -48 --; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill --; CHECK-NEXT: addi.d $a0, $sp, 8 --; CHECK-NEXT: bl %plt(callee_large_struct_ret) --; CHECK-NEXT: ld.d $a0, $sp, 32 --; CHECK-NEXT: ld.d $a1, $sp, 8 --; CHECK-NEXT: add.d $a0, $a1, $a0 --; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload --; CHECK-NEXT: addi.d $sp, $sp, 48 --; CHECK-NEXT: ret -- %1 = alloca %struct.large -- call void @callee_large_struct_ret(ptr sret(%struct.large) %1) -- %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0 -- %3 = load i64, ptr %2 -- %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3 -- %5 = load i64, ptr %4 -- %6 = add i64 %3, %5 -- ret i64 %6 --} -+;; This file contains specific tests for the lp64d ABI. - - ;; Check pass floating-point arguments whith FPRs. - -@@ -462,26 +63,26 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind { - ; CHECK: # %bb.0: - ; CHECK-NEXT: addi.d $sp, $sp, -16 - ; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) - ; CHECK-NEXT: fld.d $fa1, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_1) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_1) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_1) - ; CHECK-NEXT: fld.d $fa2, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_2) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_2) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_2) - ; CHECK-NEXT: fld.d $fa3, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_3) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_3) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_3) - ; CHECK-NEXT: fld.d $fa4, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_4) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_4) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_4) - ; CHECK-NEXT: fld.d $fa5, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_5) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_5) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_5) - ; CHECK-NEXT: fld.d $fa6, $a0, 0 --; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_6) --; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_6) -+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -+; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_6) - ; CHECK-NEXT: fld.d $fa7, $a0, 0 - ; CHECK-NEXT: addi.d $a0, $zero, 1 - ; CHECK-NEXT: movgr2fr.d $fa0, $a0 -diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll -new file mode 100644 -index 000000000..d738c066e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll -@@ -0,0 +1,97 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -+; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s -+ -+;; This file contains specific tests for the lp64s ABI. -+ -+define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { -+; CHECK-LABEL: callee_float_in_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill -+; CHECK-NEXT: move $fp, $a0 -+; CHECK-NEXT: bstrpick.d $a0, $a1, 31, 0 -+; CHECK-NEXT: bl %plt(__fixsfdi) -+; CHECK-NEXT: add.d $a0, $fp, $a0 -+; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %b_fptosi = fptosi float %b to i64 -+ %1 = add i64 %a, %b_fptosi -+ ret i64 %1 -+} -+ -+define i64 @caller_float_in_regs() nounwind { -+; CHECK-LABEL: caller_float_in_regs: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: lu12i.w $a1, 262144 -+; CHECK-NEXT: bl %plt(callee_float_in_regs) -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_float_in_regs(i64 1, float 2.0) -+ ret i64 %1 -+} -+ -+define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind { -+; CHECK-LABEL: callee_float_on_stack: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ld.w $a0, $sp, 0 -+; CHECK-NEXT: ret -+ %1 = trunc i128 %d to i64 -+ %2 = bitcast float %e to i32 -+ %3 = sext i32 %2 to i64 -+ %4 = add i64 %1, %3 -+ ret i64 %3 -+} -+ -+define i64 @caller_float_on_stack() nounwind { -+; CHECK-LABEL: caller_float_on_stack: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: lu12i.w $a0, 264704 -+; CHECK-NEXT: st.d $a0, $sp, 0 -+; CHECK-NEXT: ori $a0, $zero, 1 -+; CHECK-NEXT: ori $a2, $zero, 2 -+; CHECK-NEXT: ori $a4, $zero, 3 -+; CHECK-NEXT: ori $a6, $zero, 4 -+; CHECK-NEXT: move $a1, $zero -+; CHECK-NEXT: move $a3, $zero -+; CHECK-NEXT: move $a5, $zero -+; CHECK-NEXT: move $a7, $zero -+; CHECK-NEXT: bl %plt(callee_float_on_stack) -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0) -+ ret i64 %1 -+} -+ -+define float @callee_tiny_scalar_ret() nounwind { -+; CHECK-LABEL: callee_tiny_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: lu12i.w $a0, 260096 -+; CHECK-NEXT: ret -+ ret float 1.0 -+} -+ -+define i64 @caller_tiny_scalar_ret() nounwind { -+; CHECK-LABEL: caller_tiny_scalar_ret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: addi.d $sp, $sp, -16 -+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; CHECK-NEXT: bl %plt(callee_tiny_scalar_ret) -+; CHECK-NEXT: addi.w $a0, $a0, 0 -+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; CHECK-NEXT: addi.d $sp, $sp, 16 -+; CHECK-NEXT: ret -+ %1 = call float @callee_tiny_scalar_ret() -+ %2 = bitcast float %1 to i32 -+ %3 = sext i32 %2 to i64 -+ ret i64 %3 -+} -diff --git a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll -new file mode 100644 -index 000000000..b5435fb90 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll -@@ -0,0 +1,7 @@ -+; RUN: llc < %s --mtriple=loongarch64 --mattr=+64bit --mcpu=invalidcpu 2>&1 | FileCheck %s -+ -+; CHECK: {{.*}} is not a recognized processor for this target -+ -+define void @f() { -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll -new file mode 100644 -index 000000000..35945ae4d ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/cpus.ll -@@ -0,0 +1,20 @@ -+;; This tests that llc accepts all valid LoongArch CPUs. -+;; Note the 'generic' names have been tested in cpu-name-generic.ll. -+ -+; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s -+; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s -+; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s -+ -+; CHECK-NOT: {{.*}} is not a recognized processor for this target -+ -+define void @f() { -+ ret void -+} -+ -+define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { -+ ret void -+} -+ -+define void @tune_cpu_la464() "tune-cpu"="la464" { -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll -index d55b9b726..c004d1f9c 100644 ---- a/llvm/test/CodeGen/LoongArch/e_flags.ll -+++ b/llvm/test/CodeGen/LoongArch/e_flags.ll -@@ -1,15 +1,32 @@ - ; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32 - ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines -+ -+; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32s -o %t-ilp32s -+; RUN: llvm-readelf -h %t-ilp32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines -+ -+; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32f -o %t-ilp32f -+; RUN: llvm-readelf -h %t-ilp32f | FileCheck %s --check-prefixes=ILP32,ABI-F --match-full-lines -+ -+; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d -+; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines -+ - ; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 - ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines - --;; Note that we have not support the -target-abi option to select specific ABI. --;; See comments in LoongArchELFStreamer.cpp. So here we only check the default behaviour. --;; After -target-abi is supported, we can add more tests. -+; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64s -o %t-lp64s -+; RUN: llvm-readelf -h %t-lp64s | FileCheck %s --check-prefixes=LP64,ABI-S --match-full-lines -+ -+; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64f -o %t-lp64f -+; RUN: llvm-readelf -h %t-lp64f | FileCheck %s --check-prefixes=LP64,ABI-F --match-full-lines -+ -+; RUN: llc --mtriple=loongarch64 --filetype=obj %s --mattr=+d --target-abi=lp64d -o %t-lp64d -+; RUN: llvm-readelf -h %t-lp64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines - - ; LP64: Class: ELF64 - ; ILP32: Class: ELF32 - -+; ABI-S: Flags: 0x41, SOFT-FLOAT, OBJ-v1 -+; ABI-F: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 - ; ABI-D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 - - define void @foo() { -diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir -new file mode 100644 -index 000000000..fa5fccb1a ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir -@@ -0,0 +1,33 @@ -+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -+# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s -+ -+## Check that fcc register clobbered by inlineasm is correctly saved by examing -+## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and -+## after the INLINEASM. -+... -+--- -+name: test -+tracksRegLiveness: true -+body: | -+ bb.0.entry: -+ liveins: $f0_64, $f1_64 -+ -+ ; CHECK-LABEL: name: test -+ ; CHECK: liveins: $f0_64, $f1_64 -+ ; CHECK-NEXT: {{ $}} -+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64 -+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64 -+ ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]] -+ ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0) -+ ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 -+ ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0) -+ ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]] -+ ; CHECK-NEXT: PseudoRET implicit killed $r4 -+ %1:fpr64 = COPY $f1_64 -+ %0:fpr64 = COPY $f0_64 -+ %2:cfr = FCMP_CLT_D %1, %0 -+ INLINEASM &"nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 -+ $r4 = COPY %2 -+ PseudoRET implicit killed $r4 -+ -+... -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll -new file mode 100644 -index 000000000..e3e23e46b ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll -@@ -0,0 +1,47 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.csrrd.w(i32 immarg) nounwind -+declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) nounwind -+declare void @bug() -+ -+define dso_local void @foo(i32 noundef signext %flag) nounwind { -+; CHECK-LABEL: foo: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: beqz $a0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %if.then -+; CHECK-NEXT: csrrd $a0, 2 -+; CHECK-NEXT: ori $a0, $a0, 1 -+; CHECK-NEXT: csrwr $a0, 2 -+; CHECK-NEXT: .LBB0_2: # %if.end -+; CHECK-NEXT: csrrd $a0, 2 -+; CHECK-NEXT: andi $a0, $a0, 1 -+; CHECK-NEXT: bnez $a0, .LBB0_4 -+; CHECK-NEXT: # %bb.3: # %if.then2 -+; CHECK-NEXT: b %plt(bug) -+; CHECK-NEXT: .LBB0_4: # %if.end3 -+; CHECK-NEXT: ret -+entry: -+ %tobool.not = icmp eq i32 %flag, 0 -+ br i1 %tobool.not, label %if.end, label %if.then -+ -+if.then: ; preds = %entry -+ %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) -+ %or = or i32 %0, 1 -+ %1 = tail call i32 @llvm.loongarch.csrwr.w(i32 %or, i32 2) -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ %2 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) -+ %and = and i32 %2, 1 -+ %tobool1.not = icmp eq i32 %and, 0 -+ br i1 %tobool1.not, label %if.then2, label %if.end3 -+ -+if.then2: ; preds = %if.end -+ tail call void @bug() -+ br label %if.end3 -+ -+if.end3: ; preds = %if.then2, %if.end -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll -index 882e7f693..a839ab149 100644 ---- a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll -@@ -1,4 +1,3 @@ --; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - ; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s - ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s - -@@ -13,140 +12,140 @@ declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) - declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg) - - define void @dbar_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.dbar' out of range -+; CHECK: llvm.loongarch.dbar: argument out of range. - entry: - call void @llvm.loongarch.dbar(i32 32769) - ret void - } - - define void @dbar_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.dbar' out of range -+; CHECK: llvm.loongarch.dbar: argument out of range. - entry: - call void @llvm.loongarch.dbar(i32 -1) - ret void - } - - define void @ibar_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.ibar' out of range -+; CHECK: llvm.loongarch.ibar: argument out of range. - entry: - call void @llvm.loongarch.ibar(i32 32769) - ret void - } - - define void @ibar_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.ibar' out of range -+; CHECK: llvm.loongarch.ibar: argument out of range. - entry: - call void @llvm.loongarch.ibar(i32 -1) - ret void - } - - define void @break_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.break' out of range -+; CHECK: llvm.loongarch.break: argument out of range. - entry: - call void @llvm.loongarch.break(i32 32769) - ret void - } - - define void @break_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.break' out of range -+; CHECK: llvm.loongarch.break: argument out of range. - entry: - call void @llvm.loongarch.break(i32 -1) - ret void - } - - define void @movgr2fcsr(i32 %a) nounwind { --; CHECK: llvm.loongarch.movgr2fcsr expects basic f target feature -+; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature. - entry: - call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a) - ret void - } - - define void @movgr2fcsr_imm_out_of_hi_range(i32 %a) #0 { --; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range -+; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. - entry: - call void @llvm.loongarch.movgr2fcsr(i32 32, i32 %a) - ret void - } - - define void @movgr2fcsr_imm_out_of_lo_range(i32 %a) #0 { --; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range -+; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. - entry: - call void @llvm.loongarch.movgr2fcsr(i32 -1, i32 %a) - ret void - } - - define i32 @movfcsr2gr() nounwind { --; CHECK: llvm.loongarch.movfcsr2gr expects basic f target feature -+; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature. - entry: - %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) - ret i32 %res - } - - define i32 @movfcsr2gr_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range -+; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. - entry: - %res = call i32 @llvm.loongarch.movfcsr2gr(i32 32) - ret i32 %res - } - - define i32 @movfcsr2gr_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range -+; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. - entry: - %res = call i32 @llvm.loongarch.movfcsr2gr(i32 -1) - ret i32 %res - } - - define void @syscall_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.syscall' out of range -+; CHECK: llvm.loongarch.syscall: argument out of range. - entry: - call void @llvm.loongarch.syscall(i32 32769) - ret void - } - - define void @syscall_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.syscall' out of range -+; CHECK: llvm.loongarch.syscall: argument out of range. - entry: - call void @llvm.loongarch.syscall(i32 -1) - ret void - } - - define i32 @csrrd_w_imm_out_of_hi_range() #0 { --; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range -+; CHECK: llvm.loongarch.csrrd.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrrd.w(i32 16384) - ret i32 %0 - } - - define i32 @csrrd_w_imm_out_of_lo_range() #0 { --; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range -+; CHECK: llvm.loongarch.csrrd.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrrd.w(i32 -1) - ret i32 %0 - } - - define i32 @csrwr_w_imm_out_of_hi_range(i32 %a) #0 { --; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range -+; CHECK: llvm.loongarch.csrwr.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 16384) - ret i32 %0 - } - - define i32 @csrwr_w_imm_out_of_lo_range(i32 %a) #0 { --; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range -+; CHECK: llvm.loongarch.csrwr.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 -1) - ret i32 %0 - } - - define i32 @csrxchg_w_imm_out_of_hi_range(i32 %a, i32 %b) #0 { --; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range -+; CHECK: llvm.loongarch.csrxchg.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 16384) - ret i32 %0 - } - - define i32 @csrxchg_w_imm_out_of_lo_range(i32 %a, i32 %b) #0 { --; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range -+; CHECK: llvm.loongarch.csrxchg.w: argument out of range. - entry: - %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 -1) - ret i32 %0 -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll -new file mode 100644 -index 000000000..ad78f7f53 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll -@@ -0,0 +1,180 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s -+ -+declare i32 @llvm.loongarch.iocsrrd.b(i32) nounwind -+declare void @llvm.loongarch.iocsrwr.b(i32, i32) nounwind -+declare i32 @llvm.loongarch.iocsrrd.h(i32) nounwind -+declare void @llvm.loongarch.iocsrwr.h(i32, i32) nounwind -+declare i32 @llvm.loongarch.iocsrrd.w(i32) nounwind -+declare void @llvm.loongarch.iocsrwr.w(i32, i32) nounwind -+declare i64 @llvm.loongarch.iocsrrd.d(i32) nounwind -+declare void @llvm.loongarch.iocsrwr.d(i64, i32) nounwind -+declare void @bug() -+ -+define dso_local void @test_b(i32 noundef signext %flag) nounwind { -+; CHECK-LABEL: test_b: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: beqz $a0, .LBB0_2 -+; CHECK-NEXT: # %bb.1: # %if.then -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.b $a1, $a0 -+; CHECK-NEXT: ori $a1, $a1, 1 -+; CHECK-NEXT: iocsrwr.b $a1, $a0 -+; CHECK-NEXT: .LBB0_2: # %if.end -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.b $a0, $a0 -+; CHECK-NEXT: andi $a0, $a0, 1 -+; CHECK-NEXT: bnez $a0, .LBB0_4 -+; CHECK-NEXT: # %bb.3: # %if.then2 -+; CHECK-NEXT: b %plt(bug) -+; CHECK-NEXT: .LBB0_4: # %if.end3 -+; CHECK-NEXT: ret -+entry: -+ %tobool.not = icmp eq i32 %flag, 0 -+ br i1 %tobool.not, label %if.end, label %if.then -+ -+if.then: ; preds = %entry -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) -+ %or = or i32 %0, 1 -+ tail call void @llvm.loongarch.iocsrwr.b(i32 %or, i32 2) -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ %1 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) -+ %and = and i32 %1, 1 -+ %tobool1.not = icmp eq i32 %and, 0 -+ br i1 %tobool1.not, label %if.then2, label %if.end3 -+ -+if.then2: ; preds = %if.end -+ tail call void @bug() -+ br label %if.end3 -+ -+if.end3: ; preds = %if.then2, %if.end -+ ret void -+} -+ -+define dso_local void @test_h(i32 noundef signext %flag) nounwind { -+; CHECK-LABEL: test_h: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: beqz $a0, .LBB1_2 -+; CHECK-NEXT: # %bb.1: # %if.then -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.h $a1, $a0 -+; CHECK-NEXT: ori $a1, $a1, 1 -+; CHECK-NEXT: iocsrwr.h $a1, $a0 -+; CHECK-NEXT: .LBB1_2: # %if.end -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.h $a0, $a0 -+; CHECK-NEXT: andi $a0, $a0, 1 -+; CHECK-NEXT: bnez $a0, .LBB1_4 -+; CHECK-NEXT: # %bb.3: # %if.then2 -+; CHECK-NEXT: b %plt(bug) -+; CHECK-NEXT: .LBB1_4: # %if.end3 -+; CHECK-NEXT: ret -+entry: -+ %tobool.not = icmp eq i32 %flag, 0 -+ br i1 %tobool.not, label %if.end, label %if.then -+ -+if.then: ; preds = %entry -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) -+ %or = or i32 %0, 1 -+ tail call void @llvm.loongarch.iocsrwr.h(i32 %or, i32 2) -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ %1 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) -+ %and = and i32 %1, 1 -+ %tobool1.not = icmp eq i32 %and, 0 -+ br i1 %tobool1.not, label %if.then2, label %if.end3 -+ -+if.then2: ; preds = %if.end -+ tail call void @bug() -+ br label %if.end3 -+ -+if.end3: ; preds = %if.then2, %if.end -+ ret void -+} -+ -+define dso_local void @test_w(i32 noundef signext %flag) nounwind { -+; CHECK-LABEL: test_w: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: beqz $a0, .LBB2_2 -+; CHECK-NEXT: # %bb.1: # %if.then -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.w $a1, $a0 -+; CHECK-NEXT: ori $a1, $a1, 1 -+; CHECK-NEXT: iocsrwr.w $a1, $a0 -+; CHECK-NEXT: .LBB2_2: # %if.end -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.w $a0, $a0 -+; CHECK-NEXT: andi $a0, $a0, 1 -+; CHECK-NEXT: bnez $a0, .LBB2_4 -+; CHECK-NEXT: # %bb.3: # %if.then2 -+; CHECK-NEXT: b %plt(bug) -+; CHECK-NEXT: .LBB2_4: # %if.end3 -+; CHECK-NEXT: ret -+entry: -+ %tobool.not = icmp eq i32 %flag, 0 -+ br i1 %tobool.not, label %if.end, label %if.then -+ -+if.then: ; preds = %entry -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) -+ %or = or i32 %0, 1 -+ tail call void @llvm.loongarch.iocsrwr.w(i32 %or, i32 2) -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ %1 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) -+ %and = and i32 %1, 1 -+ %tobool1.not = icmp eq i32 %and, 0 -+ br i1 %tobool1.not, label %if.then2, label %if.end3 -+ -+if.then2: ; preds = %if.end -+ tail call void @bug() -+ br label %if.end3 -+ -+if.end3: ; preds = %if.then2, %if.end -+ ret void -+} -+ -+define dso_local void @test_d(i32 noundef signext %flag) nounwind { -+; CHECK-LABEL: test_d: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: beqz $a0, .LBB3_2 -+; CHECK-NEXT: # %bb.1: # %if.then -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.d $a1, $a0 -+; CHECK-NEXT: ori $a1, $a1, 1 -+; CHECK-NEXT: iocsrwr.d $a1, $a0 -+; CHECK-NEXT: .LBB3_2: # %if.end -+; CHECK-NEXT: ori $a0, $zero, 2 -+; CHECK-NEXT: iocsrrd.d $a0, $a0 -+; CHECK-NEXT: andi $a0, $a0, 1 -+; CHECK-NEXT: bnez $a0, .LBB3_4 -+; CHECK-NEXT: # %bb.3: # %if.then2 -+; CHECK-NEXT: b %plt(bug) -+; CHECK-NEXT: .LBB3_4: # %if.end3 -+; CHECK-NEXT: ret -+entry: -+ %tobool.not = icmp eq i32 %flag, 0 -+ br i1 %tobool.not, label %if.end, label %if.then -+ -+if.then: ; preds = %entry -+ %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) -+ %or = or i64 %0, 1 -+ tail call void @llvm.loongarch.iocsrwr.d(i64 %or, i32 2) -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ %1 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) -+ %and = and i64 %1, 1 -+ %tobool1.not = icmp eq i64 %and, 0 -+ br i1 %tobool1.not, label %if.then2, label %if.end3 -+ -+if.then2: ; preds = %if.end -+ tail call void @bug() -+ br label %if.end3 -+ -+if.end3: ; preds = %if.then2, %if.end -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll -index c91516149..5302ba558 100644 ---- a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll -@@ -20,147 +20,147 @@ declare i64 @llvm.loongarch.lddir.d(i64, i64 immarg) - declare void @llvm.loongarch.ldpte.d(i64, i64 immarg) - - define void @cacop_arg0_out_of_hi_range(i32 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.w' out of range -+; CHECK: llvm.loongarch.cacop.w: argument out of range - entry: - call void @llvm.loongarch.cacop.w(i32 32, i32 %a, i32 1024) - ret void - } - - define void @cacop_arg0_out_of_lo_range(i32 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.w' out of range -+; CHECK: llvm.loongarch.cacop.w: argument out of range - entry: - call void @llvm.loongarch.cacop.w(i32 -1, i32 %a, i32 1024) - ret void - } - - define void @cacop_arg2_out_of_hi_range(i32 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.w' out of range -+; CHECK: llvm.loongarch.cacop.w: argument out of range - entry: - call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4096) - ret void - } - - define void @cacop_arg2_out_of_lo_range(i32 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.w' out of range -+; CHECK: llvm.loongarch.cacop.w: argument out of range - entry: - call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 -4096) - ret void - } - - define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crc.w.b.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crc.w.b.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crc.w.h.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crc.w.h.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crc.w.w.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crc.w.w.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crc.w.d.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crc.w.d.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) - ret i32 %res - } - - define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crcc.w.b.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crcc.w.b.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crcc.w.h.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crcc.w.h.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crcc.w.w.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crcc.w.w.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) - ret i32 %res - } - - define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { --; CHECK: llvm.loongarch.crcc.w.d.w requires target: loongarch64 -+; CHECK: llvm.loongarch.crcc.w.d.w: requires loongarch64 - entry: - %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) - ret i32 %res - } - - define i64 @csrrd_d() { --; CHECK: llvm.loongarch.csrrd.d requires target: loongarch64 -+; CHECK: llvm.loongarch.csrrd.d: requires loongarch64 - entry: - %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) - ret i64 %0 - } - - define i64 @csrwr_d(i64 %a) { --; CHECK: llvm.loongarch.csrwr.d requires target: loongarch64 -+; CHECK: llvm.loongarch.csrwr.d: requires loongarch64 - entry: - %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) - ret i64 %0 - } - - define i64 @csrxchg_d(i64 %a, i64 %b) { --; CHECK: llvm.loongarch.csrxchg.d requires target: loongarch64 -+; CHECK: llvm.loongarch.csrxchg.d: requires loongarch64 - entry: - %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) - ret i64 %0 - } - - define i64 @iocsrrd_d(i32 %a) { --; CHECK: llvm.loongarch.iocsrrd.d requires target: loongarch64 -+; CHECK: llvm.loongarch.iocsrrd.d: requires loongarch64 - entry: - %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) - ret i64 %0 - } - - define void @iocsrwr_d(i64 %a, i32 signext %b) { --; CHECK: llvm.loongarch.iocsrwr.d requires target: loongarch64 -+; CHECK: llvm.loongarch.iocsrwr.d: requires loongarch64 - entry: - tail call void @llvm.loongarch.iocsrwr.d(i64 %a, i32 %b) - ret void - } - - define void @asrtle_d(i64 %a, i64 %b) { --; CHECK: llvm.loongarch.asrtle.d requires target: loongarch64 -+; CHECK: llvm.loongarch.asrtle.d: requires loongarch64 - entry: - tail call void @llvm.loongarch.asrtle.d(i64 %a, i64 %b) - ret void - } - - define void @asrtgt_d(i64 %a, i64 %b) { --; CHECK: llvm.loongarch.asrtgt.d requires target: loongarch64 -+; CHECK: llvm.loongarch.asrtgt.d: requires loongarch64 - entry: - tail call void @llvm.loongarch.asrtgt.d(i64 %a, i64 %b) - ret void - } - - define i64 @lddir_d(i64 %a) { --; CHECK: llvm.loongarch.lddir.d requires target: loongarch64 -+; CHECK: llvm.loongarch.lddir.d: requires loongarch64 - entry: - %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) - ret i64 %0 - } - - define void @ldpte_d(i64 %a) { --; CHECK: llvm.loongarch.ldpte.d requires target: loongarch64 -+; CHECK: llvm.loongarch.ldpte.d: requires loongarch64 - entry: - tail call void @llvm.loongarch.ldpte.d(i64 %a, i64 1) - ret void -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll -index 51f6c4453..4716d401d 100644 ---- a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll -@@ -8,76 +8,76 @@ declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg) - declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg) - - define i64 @csrrd_d_imm_out_of_hi_range() nounwind { --; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range -+; CHECK: llvm.loongarch.csrrd.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrrd.d(i32 16384) - ret i64 %0 - } - - define i64 @csrrd_d_imm_out_of_lo_range() nounwind { --; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range -+; CHECK: llvm.loongarch.csrrd.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrrd.d(i32 -1) - ret i64 %0 - } - - define i64 @csrwr_d_imm_out_of_hi_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range -+; CHECK: llvm.loongarch.csrwr.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 16384) - ret i64 %0 - } - - define i64 @csrwr_d_imm_out_of_lo_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range -+; CHECK: llvm.loongarch.csrwr.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 -1) - ret i64 %0 - } - - define i64 @csrxchg_d_imm_out_of_hi_range(i64 %a, i64 %b) nounwind { --; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range -+; CHECK: llvm.loongarch.csrxchg.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 16384) - ret i64 %0 - } - - define i64 @csrxchg_d_imm_out_of_lo_range(i64 %a, i64 %b) nounwind { --; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range -+; CHECK: llvm.loongarch.csrxchg.d: argument out of range - entry: - %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 -1) - ret i64 %0 - } - - define void @cacop_w(i32 %a) nounwind { --; CHECK: llvm.loongarch.cacop.w requires target: loongarch32 -+; CHECK: llvm.loongarch.cacop.w: requires loongarch32 - call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4) - ret void - } - - define void @cacop_arg0_out_of_hi_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.d' out of range -+; CHECK: llvm.loongarch.cacop.d: argument out of range - entry: - call void @llvm.loongarch.cacop.d(i64 32, i64 %a, i64 1024) - ret void - } - - define void @cacop_arg0_out_of_lo_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.d' out of range -+; CHECK: llvm.loongarch.cacop.d: argument out of range - entry: - call void @llvm.loongarch.cacop.d(i64 -1, i64 %a, i64 1024) - ret void - } - - define void @cacop_arg2_out_of_hi_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.d' out of range -+; CHECK: llvm.loongarch.cacop.d: argument out of range - entry: - call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 4096) - ret void - } - - define void @cacop_arg2_out_of_lo_range(i64 %a) nounwind { --; CHECK: argument to 'llvm.loongarch.cacop.d' out of range -+; CHECK: llvm.loongarch.cacop.d: argument out of range - entry: - call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 -4096) - ret void -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll -index 7b28682b5..f0ebd8508 100644 ---- a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll -+++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll -@@ -29,6 +29,14 @@ define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crc_w_b_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crc_w_b_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) -+ ret void -+} -+ - define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { - ; CHECK-LABEL: crc_w_h_w: - ; CHECK: # %bb.0: -@@ -38,6 +46,14 @@ define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crc_w_h_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crc_w_h_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) -+ ret void -+} -+ - define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { - ; CHECK-LABEL: crc_w_w_w: - ; CHECK: # %bb.0: -@@ -47,6 +63,14 @@ define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crc_w_w_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crc_w_w_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) -+ ret void -+} -+ - define void @cacop_d(i64 %a) nounwind { - ; CHECK-LABEL: cacop_d: - ; CHECK: # %bb.0: -@@ -65,6 +89,14 @@ define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crc_w_d_w_noret(i64 %a, i32 %b) nounwind { -+; CHECK-LABEL: crc_w_d_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) -+ ret void -+} -+ - define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { - ; CHECK-LABEL: crcc_w_b_w: - ; CHECK: # %bb.0: -@@ -74,6 +106,14 @@ define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crcc_w_b_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crcc_w_b_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) -+ ret void -+} -+ - define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { - ; CHECK-LABEL: crcc_w_h_w: - ; CHECK: # %bb.0: -@@ -83,6 +123,14 @@ define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crcc_w_h_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crcc_w_h_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) -+ ret void -+} -+ - define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { - ; CHECK-LABEL: crcc_w_w_w: - ; CHECK: # %bb.0: -@@ -92,6 +140,14 @@ define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crcc_w_w_w_noret(i32 %a, i32 %b) nounwind { -+; CHECK-LABEL: crcc_w_w_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) -+ ret void -+} -+ - define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { - ; CHECK-LABEL: crcc_w_d_w: - ; CHECK: # %bb.0: -@@ -101,6 +157,14 @@ define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { - ret i32 %res - } - -+define void @crcc_w_d_w_noret(i64 %a, i32 %b) nounwind { -+; CHECK-LABEL: crcc_w_d_w_noret: -+; CHECK: # %bb.0: -+; CHECK-NEXT: ret -+ %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) -+ ret void -+} -+ - define i64 @csrrd_d() { - ; CHECK-LABEL: csrrd_d: - ; CHECK: # %bb.0: # %entry -@@ -111,6 +175,16 @@ entry: - ret i64 %0 - } - -+define void @csrrd_d_noret() { -+; CHECK-LABEL: csrrd_d_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrrd $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) -+ ret void -+} -+ - define i64 @csrwr_d(i64 %a) { - ; CHECK-LABEL: csrwr_d: - ; CHECK: # %bb.0: # %entry -@@ -121,6 +195,17 @@ entry: - ret i64 %0 - } - -+;; Check that csrwr is emitted even if the return value of the intrinsic is not used. -+define void @csrwr_d_noret(i64 %a) { -+; CHECK-LABEL: csrwr_d_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrwr $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) -+ ret void -+} -+ - define i64 @csrxchg_d(i64 %a, i64 %b) { - ; CHECK-LABEL: csrxchg_d: - ; CHECK: # %bb.0: # %entry -@@ -131,6 +216,17 @@ entry: - ret i64 %0 - } - -+;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. -+define void @csrxchg_d_noret(i64 %a, i64 %b) { -+; CHECK-LABEL: csrxchg_d_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrxchg $a0, $a1, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) -+ ret void -+} -+ - define i64 @iocsrrd_d(i32 %a) { - ; CHECK-LABEL: iocsrrd_d: - ; CHECK: # %bb.0: # %entry -@@ -141,6 +237,16 @@ entry: - ret i64 %0 - } - -+define void @iocsrrd_d_noret(i32 %a) { -+; CHECK-LABEL: iocsrrd_d_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: iocsrrd.d $a0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) -+ ret void -+} -+ - define void @iocsrwr_d(i64 %a, i32 signext %b) { - ; CHECK-LABEL: iocsrwr_d: - ; CHECK: # %bb.0: # %entry -@@ -181,6 +287,16 @@ entry: - ret i64 %0 - } - -+define void @lddir_d_noret(i64 %a) { -+; CHECK-LABEL: lddir_d_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: lddir $a0, $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) -+ ret void -+} -+ - define void @ldpte_d(i64 %a) { - ; CHECK-LABEL: ldpte_d: - ; CHECK: # %bb.0: # %entry -diff --git a/llvm/test/CodeGen/LoongArch/intrinsic.ll b/llvm/test/CodeGen/LoongArch/intrinsic.ll -index cfd54e17d..f49a2500a 100644 ---- a/llvm/test/CodeGen/LoongArch/intrinsic.ll -+++ b/llvm/test/CodeGen/LoongArch/intrinsic.ll -@@ -69,6 +69,17 @@ entry: - ret i32 %res - } - -+;; TODO: Optimize out `movfcsr2gr` without data-dependency. -+define void @movfcsr2gr_noret() nounwind { -+; CHECK-LABEL: movfcsr2gr_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: movfcsr2gr $a0, $fcsr1 -+; CHECK-NEXT: ret -+entry: -+ %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) -+ ret void -+} -+ - define void @syscall() nounwind { - ; CHECK-LABEL: syscall: - ; CHECK: # %bb.0: # %entry -@@ -89,6 +100,16 @@ entry: - ret i32 %0 - } - -+define void @csrrd_w_noret() { -+; CHECK-LABEL: csrrd_w_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrrd $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1) -+ ret void -+} -+ - define i32 @csrwr_w(i32 signext %a) { - ; CHECK-LABEL: csrwr_w: - ; CHECK: # %bb.0: # %entry -@@ -99,6 +120,17 @@ entry: - ret i32 %0 - } - -+;; Check that csrwr is emitted even if the return value of the intrinsic is not used. -+define void @csrwr_w_noret(i32 signext %a) { -+; CHECK-LABEL: csrwr_w_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrwr $a0, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 1) -+ ret void -+} -+ - define i32 @csrxchg_w(i32 signext %a, i32 signext %b) { - ; CHECK-LABEL: csrxchg_w: - ; CHECK: # %bb.0: # %entry -@@ -109,6 +141,17 @@ entry: - ret i32 %0 - } - -+;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. -+define void @csrxchg_w_noret(i32 signext %a, i32 signext %b) { -+; CHECK-LABEL: csrxchg_w_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: csrxchg $a0, $a1, 1 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 1) -+ ret void -+} -+ - define i32 @iocsrrd_b(i32 %a) { - ; CHECK-LABEL: iocsrrd_b: - ; CHECK: # %bb.0: # %entry -@@ -139,6 +182,36 @@ entry: - ret i32 %0 - } - -+define void @iocsrrd_b_noret(i32 %a) { -+; CHECK-LABEL: iocsrrd_b_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: iocsrrd.b $a0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a) -+ ret void -+} -+ -+define void @iocsrrd_h_noret(i32 %a) { -+; CHECK-LABEL: iocsrrd_h_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: iocsrrd.h $a0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a) -+ ret void -+} -+ -+define void @iocsrrd_w_noret(i32 %a) { -+; CHECK-LABEL: iocsrrd_w_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: iocsrrd.w $a0, $a0 -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a) -+ ret void -+} -+ - define void @iocsrwr_b(i32 %a, i32 %b) { - ; CHECK-LABEL: iocsrwr_b: - ; CHECK: # %bb.0: # %entry -@@ -178,3 +251,12 @@ entry: - %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) - ret i32 %0 - } -+ -+define void @cpucfg_noret(i32 %a) { -+; CHECK-LABEL: cpucfg_noret: -+; CHECK: # %bb.0: # %entry -+; CHECK-NEXT: ret -+entry: -+ %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -index 01f96688f..3d7aa871b 100644 ---- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll -@@ -315,10 +315,7 @@ define double @double_fadd_acquire(ptr %p) nounwind { - ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill - ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill - ; LA64F-NEXT: move $fp, $a0 --; LA64F-NEXT: ld.wu $a0, $a0, 0 --; LA64F-NEXT: ld.wu $a1, $fp, 4 --; LA64F-NEXT: slli.d $a1, $a1, 32 --; LA64F-NEXT: or $a0, $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 - ; LA64F-NEXT: ori $s0, $zero, 8 - ; LA64F-NEXT: addi.d $s1, $sp, 8 - ; LA64F-NEXT: addi.d $s2, $sp, 0 -@@ -360,11 +357,7 @@ define double @double_fadd_acquire(ptr %p) nounwind { - ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill - ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill - ; LA64D-NEXT: move $fp, $a0 --; LA64D-NEXT: ld.wu $a0, $a0, 0 --; LA64D-NEXT: ld.wu $a1, $fp, 4 --; LA64D-NEXT: slli.d $a1, $a1, 32 --; LA64D-NEXT: or $a0, $a1, $a0 --; LA64D-NEXT: movgr2fr.d $fa0, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 - ; LA64D-NEXT: addi.d $a0, $zero, 1 - ; LA64D-NEXT: movgr2fr.d $fs0, $a0 - ; LA64D-NEXT: ori $s0, $zero, 8 -@@ -411,10 +404,7 @@ define double @double_fsub_acquire(ptr %p) nounwind { - ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill - ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill - ; LA64F-NEXT: move $fp, $a0 --; LA64F-NEXT: ld.wu $a0, $a0, 0 --; LA64F-NEXT: ld.wu $a1, $fp, 4 --; LA64F-NEXT: slli.d $a1, $a1, 32 --; LA64F-NEXT: or $a0, $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 - ; LA64F-NEXT: ori $s0, $zero, 8 - ; LA64F-NEXT: addi.d $s1, $sp, 8 - ; LA64F-NEXT: addi.d $s2, $sp, 0 -@@ -456,11 +446,7 @@ define double @double_fsub_acquire(ptr %p) nounwind { - ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill - ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill - ; LA64D-NEXT: move $fp, $a0 --; LA64D-NEXT: ld.wu $a0, $a0, 0 --; LA64D-NEXT: ld.wu $a1, $fp, 4 --; LA64D-NEXT: slli.d $a1, $a1, 32 --; LA64D-NEXT: or $a0, $a1, $a0 --; LA64D-NEXT: movgr2fr.d $fa0, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 - ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) - ; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) - ; LA64D-NEXT: fld.d $fs0, $a0, 0 -@@ -507,10 +493,7 @@ define double @double_fmin_acquire(ptr %p) nounwind { - ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill - ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill - ; LA64F-NEXT: move $fp, $a0 --; LA64F-NEXT: ld.wu $a0, $a0, 0 --; LA64F-NEXT: ld.wu $a1, $fp, 4 --; LA64F-NEXT: slli.d $a1, $a1, 32 --; LA64F-NEXT: or $a0, $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 - ; LA64F-NEXT: ori $s0, $zero, 8 - ; LA64F-NEXT: addi.d $s1, $sp, 8 - ; LA64F-NEXT: addi.d $s2, $sp, 0 -@@ -552,11 +535,7 @@ define double @double_fmin_acquire(ptr %p) nounwind { - ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill - ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill - ; LA64D-NEXT: move $fp, $a0 --; LA64D-NEXT: ld.wu $a0, $a0, 0 --; LA64D-NEXT: ld.wu $a1, $fp, 4 --; LA64D-NEXT: slli.d $a1, $a1, 32 --; LA64D-NEXT: or $a0, $a1, $a0 --; LA64D-NEXT: movgr2fr.d $fa0, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 - ; LA64D-NEXT: addi.d $a0, $zero, 1 - ; LA64D-NEXT: movgr2fr.d $fs0, $a0 - ; LA64D-NEXT: ori $s0, $zero, 8 -@@ -604,10 +583,7 @@ define double @double_fmax_acquire(ptr %p) nounwind { - ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill - ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill - ; LA64F-NEXT: move $fp, $a0 --; LA64F-NEXT: ld.wu $a0, $a0, 0 --; LA64F-NEXT: ld.wu $a1, $fp, 4 --; LA64F-NEXT: slli.d $a1, $a1, 32 --; LA64F-NEXT: or $a0, $a1, $a0 -+; LA64F-NEXT: ld.d $a0, $a0, 0 - ; LA64F-NEXT: ori $s0, $zero, 8 - ; LA64F-NEXT: addi.d $s1, $sp, 8 - ; LA64F-NEXT: addi.d $s2, $sp, 0 -@@ -649,11 +625,7 @@ define double @double_fmax_acquire(ptr %p) nounwind { - ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill - ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill - ; LA64D-NEXT: move $fp, $a0 --; LA64D-NEXT: ld.wu $a0, $a0, 0 --; LA64D-NEXT: ld.wu $a1, $fp, 4 --; LA64D-NEXT: slli.d $a1, $a1, 32 --; LA64D-NEXT: or $a0, $a1, $a0 --; LA64D-NEXT: movgr2fr.d $fa0, $a0 -+; LA64D-NEXT: fld.d $fa0, $a0, 0 - ; LA64D-NEXT: addi.d $a0, $zero, 1 - ; LA64D-NEXT: movgr2fr.d $fs0, $a0 - ; LA64D-NEXT: ori $s0, $zero, 8 -diff --git a/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll -new file mode 100644 -index 000000000..12d4bfb50 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll -@@ -0,0 +1,63 @@ -+;; Test the function attribute "patchable-function-entry". -+;; Adapted from the RISCV test case. -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=CHECK,LA32 -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=CHECK,LA64 -+ -+define void @f0() "patchable-function-entry"="0" { -+; CHECK-LABEL: f0: -+; CHECK-NEXT: .Lfunc_begin0: -+; CHECK-NOT: nop -+; CHECK: ret -+; CHECK-NOT: .section __patchable_function_entries -+ ret void -+} -+ -+define void @f1() "patchable-function-entry"="1" { -+; CHECK-LABEL: f1: -+; CHECK-NEXT: .Lfunc_begin1: -+; CHECK: nop -+; CHECK-NEXT: ret -+; CHECK: .section __patchable_function_entries,"awo",@progbits,f1{{$}} -+; LA32: .p2align 2 -+; LA32-NEXT: .word .Lfunc_begin1 -+; LA64: .p2align 3 -+; LA64-NEXT: .dword .Lfunc_begin1 -+ ret void -+} -+ -+$f5 = comdat any -+define void @f5() "patchable-function-entry"="5" comdat { -+; CHECK-LABEL: f5: -+; CHECK-NEXT: .Lfunc_begin2: -+; CHECK-COUNT-5: nop -+; CHECK-NEXT: ret -+; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}} -+; LA32: .p2align 2 -+; LA32-NEXT: .word .Lfunc_begin2 -+; LA64: .p2align 3 -+; LA64-NEXT: .dword .Lfunc_begin2 -+ ret void -+} -+ -+;; -fpatchable-function-entry=3,2 -+;; "patchable-function-prefix" emits data before the function entry label. -+define void @f3_2() "patchable-function-entry"="1" "patchable-function-prefix"="2" { -+; CHECK-LABEL: .type f3_2,@function -+; CHECK-NEXT: .Ltmp0: # @f3_2 -+; CHECK-COUNT-2: nop -+; CHECK-NEXT: f3_2: -+; CHECK: # %bb.0: -+; CHECK-NEXT: nop -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA64-NEXT: addi.d $sp, $sp, -16 -+;; .size does not include the prefix. -+; CHECK: .Lfunc_end3: -+; CHECK-NEXT: .size f3_2, .Lfunc_end3-f3_2 -+; CHECK: .section __patchable_function_entries,"awo",@progbits,f3_2{{$}} -+; LA32: .p2align 2 -+; LA32-NEXT: .word .Ltmp0 -+; LA64: .p2align 3 -+; LA64-NEXT: .dword .Ltmp0 -+ %frame = alloca i8, i32 16 -+ ret void -+} -diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll -index f09b49688..ff5476317 100644 ---- a/llvm/test/CodeGen/LoongArch/tail-calls.ll -+++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll -@@ -13,6 +13,7 @@ entry: - } - - ;; Perform tail call optimization for external symbol. -+;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns. - @dest = global [2 x i8] zeroinitializer - declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1) - define void @caller_extern(ptr %src) optsize { -@@ -21,10 +22,10 @@ define void @caller_extern(ptr %src) optsize { - ; CHECK-NEXT: move $a1, $a0 - ; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest) - ; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest) --; CHECK-NEXT: ori $a2, $zero, 7 -+; CHECK-NEXT: ori $a2, $zero, 33 - ; CHECK-NEXT: b %plt(memcpy) - entry: -- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false) -+ tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false) - ret void - } - -diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll -new file mode 100644 -index 000000000..1d5ed089c ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll -@@ -0,0 +1,74 @@ -+;; Check that an unknown --target-abi is ignored and the triple-implied ABI is -+;; used. -+; RUN: llc --mtriple=loongarch32-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=ILP32D,UNKNOWN -+; RUN: llc --mtriple=loongarch64-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=LP64D,UNKNOWN -+ -+; UNKNOWN: 'foo' is not a recognized ABI for this target, ignoring and using triple-implied ABI -+ -+;; Check that --target-abi takes precedence over triple-supplied ABI modifiers. -+; RUN: llc --mtriple=loongarch32-linux-gnusf --target-abi=ilp32d --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=ILP32D,CONFLICT-ILP32D -+; RUN: llc --mtriple=loongarch64-linux-gnusf --target-abi=lp64d --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=LP64D,CONFLICT-LP64D -+ -+; CONFLICT-ILP32D: warning: triple-implied ABI conflicts with provided target-abi 'ilp32d', using target-abi -+; CONFLICT-LP64D: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi -+ -+;; Check that no warning is reported when there is no environment component in -+;; triple-supplied ABI modifiers and --target-abi is used. -+; RUN: llc --mtriple=loongarch64-linux --target-abi=lp64d --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=LP64D,NO-WARNING -+ -+; NO-WARNING-NOT: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi -+ -+;; Check that ILP32-on-LA64 and LP64-on-LA32 combinations are handled properly. -+; RUN: llc --mtriple=loongarch64 --target-abi=ilp32d --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=LP64D,32ON64 -+; RUN: llc --mtriple=loongarch32 --target-abi=lp64d --mattr=+d < %s 2>&1 \ -+; RUN: | FileCheck %s --check-prefixes=ILP32D,64ON32 -+ -+; 32ON64: 32-bit ABIs are not supported for 64-bit targets, ignoring target-abi and using triple-implied ABI -+; 64ON32: 64-bit ABIs are not supported for 32-bit targets, ignoring target-abi and using triple-implied ABI -+ -+define float @f(float %a) { -+; ILP32D-LABEL: f: -+; ILP32D: # %bb.0: -+; ILP32D-NEXT: addi.w $a0, $zero, 1 -+; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -+; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -+; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 -+; ILP32D-NEXT: ret -+; -+; LP64D-LABEL: f: -+; LP64D: # %bb.0: -+; LP64D-NEXT: addi.w $a0, $zero, 1 -+; LP64D-NEXT: movgr2fr.w $fa1, $a0 -+; LP64D-NEXT: ffint.s.w $fa1, $fa1 -+; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 -+; LP64D-NEXT: ret -+ %1 = fadd float %a, 1.0 -+ ret float %1 -+} -+ -+define double @g(double %a) { -+; ILP32D-LABEL: g: -+; ILP32D: # %bb.0: -+; ILP32D-NEXT: addi.w $a0, $zero, 1 -+; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -+; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -+; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 -+; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 -+; ILP32D-NEXT: ret -+; -+; LP64D-LABEL: g: -+; LP64D: # %bb.0: -+; LP64D-NEXT: addi.d $a0, $zero, 1 -+; LP64D-NEXT: movgr2fr.d $fa1, $a0 -+; LP64D-NEXT: ffint.d.l $fa1, $fa1 -+; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 -+; LP64D-NEXT: ret -+ %1 = fadd double %a, 1.0 -+ ret double %1 -+} -diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll -new file mode 100644 -index 000000000..0aca33903 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll -@@ -0,0 +1,49 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+ -+;; Check that the correct ABI is chosen based on the triple given. -+;; TODO: enable the S and F ABIs once support is wired up. -+; RUN: llc --mtriple=loongarch32-linux-gnuf64 --mattr=+d < %s \ -+; RUN: | FileCheck %s --check-prefix=ILP32D -+; RUN: llc --mtriple=loongarch64-linux-gnuf64 --mattr=+d < %s \ -+; RUN: | FileCheck %s --check-prefix=LP64D -+ -+define float @f(float %a) { -+; ILP32D-LABEL: f: -+; ILP32D: # %bb.0: -+; ILP32D-NEXT: addi.w $a0, $zero, 1 -+; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -+; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -+; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 -+; ILP32D-NEXT: ret -+; -+; LP64D-LABEL: f: -+; LP64D: # %bb.0: -+; LP64D-NEXT: addi.w $a0, $zero, 1 -+; LP64D-NEXT: movgr2fr.w $fa1, $a0 -+; LP64D-NEXT: ffint.s.w $fa1, $fa1 -+; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 -+; LP64D-NEXT: ret -+ %1 = fadd float %a, 1.0 -+ ret float %1 -+} -+ -+define double @g(double %a) { -+; ILP32D-LABEL: g: -+; ILP32D: # %bb.0: -+; ILP32D-NEXT: addi.w $a0, $zero, 1 -+; ILP32D-NEXT: movgr2fr.w $fa1, $a0 -+; ILP32D-NEXT: ffint.s.w $fa1, $fa1 -+; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 -+; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 -+; ILP32D-NEXT: ret -+; -+; LP64D-LABEL: g: -+; LP64D: # %bb.0: -+; LP64D-NEXT: addi.d $a0, $zero, 1 -+; LP64D-NEXT: movgr2fr.d $fa1, $a0 -+; LP64D-NEXT: ffint.d.l $fa1, $fa1 -+; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 -+; LP64D-NEXT: ret -+ %1 = fadd double %a, 1.0 -+ ret double %1 -+} -diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll -new file mode 100644 -index 000000000..871c17f06 ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll -@@ -0,0 +1,72 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -+ -+;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll. -+ -+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED -+; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED -+; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED -+ -+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED -+; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED -+; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED -+ -+define i32 @f0(ptr %p) nounwind { -+; LA32-ALIGNED-LABEL: f0: -+; LA32-ALIGNED: # %bb.0: -+; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0 -+; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2 -+; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16 -+; LA32-ALIGNED-NEXT: or $a0, $a0, $a1 -+; LA32-ALIGNED-NEXT: ret -+; -+; LA32-UNALIGNED-LABEL: f0: -+; LA32-UNALIGNED: # %bb.0: -+; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0 -+; LA32-UNALIGNED-NEXT: ret -+; -+; LA64-UNALIGNED-LABEL: f0: -+; LA64-UNALIGNED: # %bb.0: -+; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0 -+; LA64-UNALIGNED-NEXT: ret -+; -+; LA64-ALIGNED-LABEL: f0: -+; LA64-ALIGNED: # %bb.0: -+; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0 -+; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2 -+; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16 -+; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 -+; LA64-ALIGNED-NEXT: ret -+ %tmp = load i32, ptr %p, align 2 -+ ret i32 %tmp -+} -+ -+define i64 @f1(ptr %p) nounwind { -+; LA32-ALIGNED-LABEL: f1: -+; LA32-ALIGNED: # %bb.0: -+; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0 -+; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4 -+; LA32-ALIGNED-NEXT: move $a0, $a2 -+; LA32-ALIGNED-NEXT: ret -+; -+; LA32-UNALIGNED-LABEL: f1: -+; LA32-UNALIGNED: # %bb.0: -+; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0 -+; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4 -+; LA32-UNALIGNED-NEXT: move $a0, $a2 -+; LA32-UNALIGNED-NEXT: ret -+; -+; LA64-UNALIGNED-LABEL: f1: -+; LA64-UNALIGNED: # %bb.0: -+; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0 -+; LA64-UNALIGNED-NEXT: ret -+; -+; LA64-ALIGNED-LABEL: f1: -+; LA64-ALIGNED: # %bb.0: -+; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0 -+; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4 -+; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32 -+; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 -+; LA64-ALIGNED-NEXT: ret -+ %tmp = load i64, ptr %p, align 4 -+ ret i64 %tmp -+} -diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll -new file mode 100644 -index 000000000..37afe7e3e ---- /dev/null -+++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll -@@ -0,0 +1,97 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -+ -+;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll. -+ -+; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32 -+; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64 -+ -+;; Small (16 bytes here) unaligned memcpy() should be a function call if -+;; ual is turned off. -+define void @t0(ptr %out, ptr %in) { -+; LA32-LABEL: t0: -+; LA32: # %bb.0: # %entry -+; LA32-NEXT: addi.w $sp, $sp, -16 -+; LA32-NEXT: .cfi_def_cfa_offset 16 -+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -+; LA32-NEXT: .cfi_offset 1, -4 -+; LA32-NEXT: ori $a2, $zero, 16 -+; LA32-NEXT: bl %plt(memcpy) -+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -+; LA32-NEXT: addi.w $sp, $sp, 16 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: t0: -+; LA64: # %bb.0: # %entry -+; LA64-NEXT: addi.d $sp, $sp, -16 -+; LA64-NEXT: .cfi_def_cfa_offset 16 -+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -+; LA64-NEXT: .cfi_offset 1, -8 -+; LA64-NEXT: ori $a2, $zero, 16 -+; LA64-NEXT: bl %plt(memcpy) -+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -+; LA64-NEXT: addi.d $sp, $sp, 16 -+; LA64-NEXT: ret -+entry: -+ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false) -+ ret void -+} -+ -+;; Small (16 bytes here) aligned memcpy() should be inlined even if -+;; ual is turned off. -+define void @t1(ptr align 8 %out, ptr align 8 %in) { -+; LA32-LABEL: t1: -+; LA32: # %bb.0: # %entry -+; LA32-NEXT: ld.w $a2, $a1, 12 -+; LA32-NEXT: st.w $a2, $a0, 12 -+; LA32-NEXT: ld.w $a2, $a1, 8 -+; LA32-NEXT: st.w $a2, $a0, 8 -+; LA32-NEXT: ld.w $a2, $a1, 4 -+; LA32-NEXT: st.w $a2, $a0, 4 -+; LA32-NEXT: ld.w $a1, $a1, 0 -+; LA32-NEXT: st.w $a1, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: t1: -+; LA64: # %bb.0: # %entry -+; LA64-NEXT: ld.d $a2, $a1, 8 -+; LA64-NEXT: st.d $a2, $a0, 8 -+; LA64-NEXT: ld.d $a1, $a1, 0 -+; LA64-NEXT: st.d $a1, $a0, 0 -+; LA64-NEXT: ret -+entry: -+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false) -+ ret void -+} -+ -+;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized -+;; loads and stores if ual is turned off. -+define void @t2(ptr %out, ptr %in) { -+; LA32-LABEL: t2: -+; LA32: # %bb.0: # %entry -+; LA32-NEXT: ld.b $a2, $a1, 3 -+; LA32-NEXT: st.b $a2, $a0, 3 -+; LA32-NEXT: ld.b $a2, $a1, 2 -+; LA32-NEXT: st.b $a2, $a0, 2 -+; LA32-NEXT: ld.b $a2, $a1, 1 -+; LA32-NEXT: st.b $a2, $a0, 1 -+; LA32-NEXT: ld.b $a1, $a1, 0 -+; LA32-NEXT: st.b $a1, $a0, 0 -+; LA32-NEXT: ret -+; -+; LA64-LABEL: t2: -+; LA64: # %bb.0: # %entry -+; LA64-NEXT: ld.b $a2, $a1, 3 -+; LA64-NEXT: st.b $a2, $a0, 3 -+; LA64-NEXT: ld.b $a2, $a1, 2 -+; LA64-NEXT: st.b $a2, $a0, 2 -+; LA64-NEXT: ld.b $a2, $a1, 1 -+; LA64-NEXT: st.b $a2, $a0, 1 -+; LA64-NEXT: ld.b $a1, $a1, 0 -+; LA64-NEXT: st.b $a1, $a0, 0 -+; LA64-NEXT: ret -+entry: -+ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false) -+ ret void -+} -+ -+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) -diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll -new file mode 100644 -index 000000000..8ff055f13 ---- /dev/null -+++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll -@@ -0,0 +1,39 @@ -+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s -+ -+;; Check this won't result in crash. -+define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) { -+ %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer) -+ %6 = add nsw <8 x i32> %1, -+ call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15 -+ %7 = bitcast i8 %2 to <8 x i1> -+ %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5 -+ %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer) -+ %10 = bitcast i8 %3 to <8 x i1> -+ %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> -+ ret <8 x i32> %11 -+} -+ -+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) -+declare void @llvm.dbg.value(metadata, metadata, metadata) -+ -+!llvm.dbg.cu = !{!0} -+!llvm.module.flags = !{!3} -+ -+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) -+!1 = !DIFile(filename: "a.cpp", directory: "/") -+!2 = !{} -+!3 = !{i32 2, !"Debug Info Version", i32 3} -+!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11) -+!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10) -+!6 = !DINamespace(name: "ns1", scope: null) -+!7 = !DISubroutineType(types: !8) -+!8 = !{null} -+!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) -+!10 = !{!4} -+!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13) -+!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) -+!13 = !{!14} -+!14 = !DISubrange(count: 4) -+!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16) -+!16 = !DILocation(line: 18, scope: !17) -+!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -diff --git a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll -index 4b603cd29..51a5905fe 100644 ---- a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll -+++ b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support emulated tls. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: not lli -no-process-syms -emulated-tls -jit-kind=orc-lazy %s 2>&1 \ - ; RUN: | FileCheck %s - ; -diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg -index 4161b4f3c..3a3d23f2b 100644 ---- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg -+++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg -@@ -1,6 +1,8 @@ - import sys - --if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', 'mips', 'mipsel', 'mips64', 'mips64el']: -+if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', -+ 'mips', 'mipsel', 'mips64', 'mips64el', -+ 'loongarch64']: - config.unsupported = True - - # FIXME: These tests don't pass with the COFF rtld. -diff --git a/llvm/test/ExecutionEngine/frem.ll b/llvm/test/ExecutionEngine/frem.ll -index b8739c249..d33e4fca8 100644 ---- a/llvm/test/ExecutionEngine/frem.ll -+++ b/llvm/test/ExecutionEngine/frem.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; LLI.exe used to crash on Windows\X86 when certain single precession - ; floating point intrinsics (defined as macros) are used. - ; This unit test guards against the failure. -diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg -index e71e7cf3c..b00ef0dcb 100644 ---- a/llvm/test/ExecutionEngine/lit.local.cfg -+++ b/llvm/test/ExecutionEngine/lit.local.cfg -@@ -1,4 +1,4 @@ --if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV', 'LoongArch']: -+if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV']: - config.unsupported = True - - # ExecutionEngine tests are not expected to pass in a cross-compilation setup. -diff --git a/llvm/test/ExecutionEngine/mov64zext32.ll b/llvm/test/ExecutionEngine/mov64zext32.ll -index bba1a1987..43bd0fb2f 100644 ---- a/llvm/test/ExecutionEngine/mov64zext32.ll -+++ b/llvm/test/ExecutionEngine/mov64zext32.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll -index 6f784265a..99d95791c 100644 ---- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll -+++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll -index 6896af83c..2e5592d4d 100644 ---- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll -+++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll -index f654120ea..1e11659b1 100644 ---- a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll -+++ b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll -index 84bdec1cf..e919550de 100644 ---- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll -+++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll -index 5a20fc4f1..9862d6af1 100644 ---- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll -+++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll -@@ -1,3 +1,6 @@ -+; LoongArch does not support mcjit. -+; UNSUPPORTED: target=loongarch{{.*}} -+ - ; RUN: %lli -jit-kind=mcjit %s > /dev/null - ; RUN: %lli %s > /dev/null - -diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -new file mode 100644 -index 000000000..8a4ab5958 ---- /dev/null -+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll -@@ -0,0 +1,78 @@ -+; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64-unknown-linux-gnu" -+ -+;; First, check allocation of the save area. -+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 -+declare void @llvm.va_start(ptr) #2 -+declare void @llvm.va_end(ptr) #2 -+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 -+define i32 @foo(i32 %guard, ...) { -+; CHECK-LABEL: @foo -+; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -+; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] -+; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] -+; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) -+; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) -+; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) -+; -+ %vl = alloca ptr, align 8 -+ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) -+ call void @llvm.va_start(ptr %vl) -+ call void @llvm.va_end(ptr %vl) -+ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) -+ ret i32 0 -+} -+ -+;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls -+;; array. -+define i32 @bar() { -+; CHECK-LABEL: @bar -+; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -+; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Check multiple fixed arguments. -+declare i32 @foo2(i32 %g1, i32 %g2, ...) -+define i32 @bar2() { -+; CHECK-LABEL: @bar2 -+; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -+; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -+; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls -+; -+ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) -+ ret i32 %1 -+} -+ -+;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are -+;; passed to a variadic function. -+declare i64 @sum(i64 %n, ...) -+define dso_local i64 @many_args() { -+;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. -+; CHECK-LABEL: @many_args -+; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) -+; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) -+; -+entry: -+ %ret = call i64 (i64, ...) @sum(i64 120, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, -+ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 -+ ) -+ ret i64 %ret -+} -diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll -new file mode 100644 -index 000000000..dcbe2a242 ---- /dev/null -+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll -@@ -0,0 +1,14 @@ -+; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 -+; Test that code using va_start can be compiled on LoongArch. -+ -+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" -+target triple = "loongarch64-unknown-linux-gnu" -+ -+define void @VaStart(ptr %s, ...) { -+entry: -+ %vl = alloca ptr, align 4 -+ call void @llvm.va_start(ptr %vl) -+ ret void -+} -+ -+declare void @llvm.va_start(ptr) -diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s -new file mode 100644 -index 000000000..0179e1027 ---- /dev/null -+++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s -@@ -0,0 +1,28 @@ -+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t -+# RUN: llvm-readobj -r %t | FileCheck %s -+ -+## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations. -+ -+## TODO: 1- or 2-byte data relocations are not supported for now. -+ -+# CHECK: Relocations [ -+# CHECK-NEXT: Section ({{.*}}) .rela.data { -+# CHECK-NEXT: 0x0 R_LARCH_64_PCREL sx 0x0 -+# CHECK-NEXT: 0x8 R_LARCH_64_PCREL sy 0x0 -+# CHECK-NEXT: 0x10 R_LARCH_32_PCREL sx 0x0 -+# CHECK-NEXT: 0x14 R_LARCH_32_PCREL sy 0x0 -+# CHECK-NEXT: } -+ -+.section sx,"a" -+x: -+nop -+ -+.data -+.8byte x-. -+.8byte y-. -+.4byte x-. -+.4byte y-. -+ -+.section sy,"a" -+y: -+nop -diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test -index 78fc14355..fc5856691 100644 ---- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test -+++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test -@@ -42,6 +42,12 @@ - # RUN: llvm-objcopy -I binary -O elf32-hexagon %t.txt %t.hexagon.o - # RUN: llvm-readobj --file-headers %t.hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32 - -+# RUN: llvm-objcopy -I binary -O elf32-loongarch %t.txt %t.la32.o -+# RUN: llvm-readobj --file-headers %t.la32.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32 -+ -+# RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o -+# RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64 -+ - # CHECK: Format: - # 32-SAME: elf32- - # 64-SAME: elf64- -@@ -49,6 +55,8 @@ - # ARM-SAME: littlearm - # HEXAGON-SAME: hexagon - # I386-SAME: i386 -+# LA32-SAME: loongarch{{$}} -+# LA64-SAME: loongarch{{$}} - # MIPS-SAME: mips{{$}} - # RISCV32-SAME: riscv{{$}} - # RISCV64-SAME: riscv{{$}} -@@ -62,6 +70,8 @@ - # ARM-NEXT: Arch: arm - # HEXAGON-NEXT: Arch: hexagon - # I386-NEXT: Arch: i386 -+# LA32-NEXT: Arch: loongarch32 -+# LA64-NEXT: Arch: loongarch64 - # MIPS-NEXT: Arch: mips{{$}} - # PPC32BE-NEXT: Arch: powerpc{{$}} - # PPC32LE-NEXT: Arch: powerpcle{{$}} -@@ -97,6 +107,8 @@ - # ARM-NEXT: Machine: EM_ARM (0x28) - # HEXAGON-NEXT: Machine: EM_HEXAGON (0xA4) - # I386-NEXT: Machine: EM_386 (0x3) -+# LA32-NEXT: Machine: EM_LOONGARCH (0x102) -+# LA64-NEXT: Machine: EM_LOONGARCH (0x102) - # MIPS-NEXT: Machine: EM_MIPS (0x8) - # PPC32-NEXT: Machine: EM_PPC (0x14) - # PPC64-NEXT: Machine: EM_PPC64 (0x15) -diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test -index 98f1b3c64..882940c05 100644 ---- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test -+++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test -@@ -109,6 +109,14 @@ - # RUN: llvm-readobj --file-headers %t.elf32_hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV - # RUN: llvm-readobj --file-headers %t.elf32_hexagon.dwo | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV - -+# RUN: llvm-objcopy %t.o -O elf32-loongarch %t.elf32_loongarch.o --split-dwo=%t.elf32_loongarch.dwo -+# RUN: llvm-readobj --file-headers %t.elf32_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV -+# RUN: llvm-readobj --file-headers %t.elf32_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV -+ -+# RUN: llvm-objcopy %t.o -O elf64-loongarch %t.elf64_loongarch.o --split-dwo=%t.elf64_loongarch.dwo -+# RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV -+# RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV -+ - !ELF - FileHeader: - Class: ELFCLASS32 -@@ -144,6 +152,8 @@ Symbols: - # AARCH-SAME: aarch64 - # ARM-SAME: littlearm - # HEXAGON-SAME: hexagon -+# LA32-SAME: loongarch{{$}} -+# LA64-SAME: loongarch{{$}} - # MIPS-SAME: mips - # PPCBE-SAME: powerpc{{$}} - # PPCLE-SAME: powerpcle{{$}} -@@ -158,6 +168,8 @@ Symbols: - # AARCH-NEXT: Arch: aarch64 - # ARM-NEXT: Arch: arm - # HEXAGON-NEXT: Arch: hexagon -+# LA32-NEXT: Arch: loongarch32 -+# LA64-NEXT: Arch: loongarch64 - # MIPSBE-NEXT: Arch: mips{{$}} - # MIPSLE-NEXT: Arch: mipsel{{$}} - # MIPS64BE-NEXT: Arch: mips64{{$}} -@@ -190,6 +202,8 @@ Symbols: - # HEXAGON: Machine: EM_HEXAGON (0xA4) - # I386: Machine: EM_386 (0x3) - # IAMCU: Machine: EM_IAMCU (0x6) -+# LA32: Machine: EM_LOONGARCH (0x102) -+# LA64: Machine: EM_LOONGARCH (0x102) - # MIPS: Machine: EM_MIPS (0x8) - # PPC32: Machine: EM_PPC (0x14) - # PPC64: Machine: EM_PPC64 (0x15) -diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -index c26fae7e8..e32dc893f 100644 ---- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -+++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test -@@ -93,6 +93,15 @@ - # CHECK: Type: R_LARCH_TLS_GD_HI20 (98) - # CHECK: Type: R_LARCH_32_PCREL (99) - # CHECK: Type: R_LARCH_RELAX (100) -+# CHECK: Type: R_LARCH_DELETE (101) -+# CHECK: Type: R_LARCH_ALIGN (102) -+# CHECK: Type: R_LARCH_PCREL20_S2 (103) -+# CHECK: Type: R_LARCH_CFA (104) -+# CHECK: Type: R_LARCH_ADD6 (105) -+# CHECK: Type: R_LARCH_SUB6 (106) -+# CHECK: Type: R_LARCH_ADD_ULEB128 (107) -+# CHECK: Type: R_LARCH_SUB_ULEB128 (108) -+# CHECK: Type: R_LARCH_64_PCREL (109) - - --- !ELF - FileHeader: -@@ -193,3 +202,12 @@ Sections: - - Type: R_LARCH_TLS_GD_HI20 - - Type: R_LARCH_32_PCREL - - Type: R_LARCH_RELAX -+ - Type: R_LARCH_DELETE -+ - Type: R_LARCH_ALIGN -+ - Type: R_LARCH_PCREL20_S2 -+ - Type: R_LARCH_CFA -+ - Type: R_LARCH_ADD6 -+ - Type: R_LARCH_SUB6 -+ - Type: R_LARCH_ADD_ULEB128 -+ - Type: R_LARCH_SUB_ULEB128 -+ - Type: R_LARCH_64_PCREL -diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp -index 577b83732..42bd8371d 100644 ---- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp -+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp -@@ -331,7 +331,11 @@ static const StringMap TargetMap{ - // SPARC - {"elf32-sparc", {ELF::EM_SPARC, false, false}}, - {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, -+ // Hexagon - {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, -+ // LoongArch -+ {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, -+ {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, - }; - - static Expected -diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp -index 9cf8feb0e..35fc2ec69 100644 ---- a/llvm/unittests/Object/ELFTest.cpp -+++ b/llvm/unittests/Object/ELFTest.cpp -@@ -233,6 +233,24 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { - getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL)); - EXPECT_EQ("R_LARCH_RELAX", - getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX)); -+ EXPECT_EQ("R_LARCH_DELETE", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE)); -+ EXPECT_EQ("R_LARCH_ALIGN", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN)); -+ EXPECT_EQ("R_LARCH_PCREL20_S2", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2)); -+ EXPECT_EQ("R_LARCH_CFA", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA)); -+ EXPECT_EQ("R_LARCH_ADD6", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6)); -+ EXPECT_EQ("R_LARCH_SUB6", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6)); -+ EXPECT_EQ("R_LARCH_ADD_ULEB128", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128)); -+ EXPECT_EQ("R_LARCH_SUB_ULEB128", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); -+ EXPECT_EQ("R_LARCH_64_PCREL", -+ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); - } - - TEST(ELFTest, getELFRelativeRelocationType) { -diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp -index 77de43a16..b19699fc0 100644 ---- a/llvm/unittests/TargetParser/TripleTest.cpp -+++ b/llvm/unittests/TargetParser/TripleTest.cpp -@@ -1225,12 +1225,14 @@ TEST(TripleTest, BitWidthPredicates) { - EXPECT_TRUE(T.isArch32Bit()); - EXPECT_FALSE(T.isArch64Bit()); - EXPECT_TRUE(T.isLoongArch()); -+ EXPECT_TRUE(T.isLoongArch32()); - - T.setArch(Triple::loongarch64); - EXPECT_FALSE(T.isArch16Bit()); - EXPECT_FALSE(T.isArch32Bit()); - EXPECT_TRUE(T.isArch64Bit()); - EXPECT_TRUE(T.isLoongArch()); -+ EXPECT_TRUE(T.isLoongArch64()); - - T.setArch(Triple::dxil); - EXPECT_FALSE(T.isArch16Bit()); diff --git a/0001-llvm-Add-install-targets-for-gtest.patch b/0001-llvm-Add-install-targets-for-gtest.patch deleted file mode 100644 index e84c44402ffbd5c0262f0399c61d687de2d42a80..0000000000000000000000000000000000000000 --- a/0001-llvm-Add-install-targets-for-gtest.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001 -From: Tom Stellard -Date: Thu, 17 Nov 2022 09:01:10 +0000 -Subject: Add install targets for gtest - -Stand-alone builds need an installed version of gtest in order to run -the unittests. - -Differential Revision: https://reviews.llvm.org/D137890 ---- - llvm/CMakeLists.txt | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt -index 60e1f29620af..d91338532815 100644 ---- a/llvm/CMakeLists.txt -+++ b/llvm/CMakeLists.txt -@@ -693,6 +693,11 @@ option(LLVM_BUILD_TESTS - "Build LLVM unit tests. If OFF, just generate build targets." OFF) - option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) - -+option(LLVM_INSTALL_GTEST -+ "Install the llvm gtest library. This should be on if you want to do -+ stand-alone builds of the other projects and run their unit tests." OFF) -+ -+ - option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default - targets. If OFF, benchmarks still could be built using Benchmarks target." OFF) - option(LLVM_INCLUDE_BENCHMARKS "Generate benchmark targets. If OFF, benchmarks can't be built." ON) --- -2.34.3 - diff --git a/0002-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch b/0002-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch new file mode 100644 index 0000000000000000000000000000000000000000..b08aea765515c8b0aca1c362f9951ffbdce5282e --- /dev/null +++ b/0002-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch @@ -0,0 +1,299 @@ +From 77d74b8fa071fa2695c9782e2e63e7b930895b1b Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Wed, 20 Dec 2023 10:54:51 +0800 +Subject: [PATCH 02/66] [LoongArch] Allow delayed decision for ADD/SUB + relocations (#72960) + +Refer to RISCV [1], LoongArch also need delayed decision for ADD/SUB +relocations. In handleAddSubRelocations, just return directly if SecA != +SecB, handleFixup usually will finish the the rest of creating PCRel +relocations works. Otherwise we emit relocs depends on whether +relaxation is enabled. If not, we return true and avoid record ADD/SUB +relocations. +Now the two symbols separated by alignment directive will return without +folding symbol offset in AttemptToFoldSymbolOffsetDifference, which has +the same effect when relaxation is enabled. + +[1] https://reviews.llvm.org/D155357 + +(cherry picked from commit a8081ed8ff0fd11fb8d5f4c83df49da909e49612) +Change-Id: Ic4c6a3eb11b576cb0c6ed0eba02150ad67c33cf2 +--- + llvm/lib/MC/MCExpr.cpp | 3 +- + .../MCTargetDesc/LoongArchAsmBackend.cpp | 78 +++++++++++++++++++ + .../MCTargetDesc/LoongArchAsmBackend.h | 9 ++- + .../MCTargetDesc/LoongArchFixupKinds.h | 4 +- + llvm/test/MC/LoongArch/Misc/subsection.s | 38 +++++++++ + .../MC/LoongArch/Relocations/relax-addsub.s | 68 ++++++++++++++++ + 6 files changed, 196 insertions(+), 4 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Misc/subsection.s + create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-addsub.s + +diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp +index a7b980553af0..5a6596f93824 100644 +--- a/llvm/lib/MC/MCExpr.cpp ++++ b/llvm/lib/MC/MCExpr.cpp +@@ -635,7 +635,8 @@ static void AttemptToFoldSymbolOffsetDifference( + // instructions and InSet is false (not expressions in directive like + // .size/.fill), disable the fast path. + if (Layout && (InSet || !SecA.hasInstructions() || +- !Asm->getContext().getTargetTriple().isRISCV())) { ++ !(Asm->getContext().getTargetTriple().isRISCV() || ++ Asm->getContext().getTargetTriple().isLoongArch()))) { + // If both symbols are in the same fragment, return the difference of their + // offsets. canGetFragmentOffset(FA) may be false. + if (FA == FB && !SA.isVariable() && !SB.isVariable()) { +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index aae3e544d326..1ed047a8e632 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -177,6 +177,34 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + } + } + ++static inline std::pair ++getRelocPairForSize(unsigned Size) { ++ switch (Size) { ++ default: ++ llvm_unreachable("unsupported fixup size"); ++ case 6: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD6), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB6)); ++ case 8: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD8), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB8)); ++ case 16: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD16), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB16)); ++ case 32: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD32), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB32)); ++ case 64: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); ++ } ++} ++ + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + // We mostly follow binutils' convention here: align to 4-byte boundary with a +@@ -191,6 +219,56 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + return true; + } + ++bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, ++ const MCFragment &F, ++ const MCFixup &Fixup, ++ const MCValue &Target, ++ uint64_t &FixedValue) const { ++ std::pair FK; ++ uint64_t FixedValueA, FixedValueB; ++ const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); ++ const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); ++ ++ // We need record relocation if SecA != SecB. Usually SecB is same as the ++ // section of Fixup, which will be record the relocation as PCRel. If SecB ++ // is not same as the section of Fixup, it will report error. Just return ++ // false and then this work can be finished by handleFixup. ++ if (&SecA != &SecB) ++ return false; ++ ++ // In SecA == SecB case. If the linker relaxation is enabled, we need record ++ // the ADD, SUB relocations. Otherwise the FixedValue has already been ++ // calculated out in evaluateFixup, return true and avoid record relocations. ++ if (!STI.hasFeature(LoongArch::FeatureRelax)) ++ return true; ++ ++ switch (Fixup.getKind()) { ++ case llvm::FK_Data_1: ++ FK = getRelocPairForSize(8); ++ break; ++ case llvm::FK_Data_2: ++ FK = getRelocPairForSize(16); ++ break; ++ case llvm::FK_Data_4: ++ FK = getRelocPairForSize(32); ++ break; ++ case llvm::FK_Data_8: ++ FK = getRelocPairForSize(64); ++ break; ++ default: ++ llvm_unreachable("unsupported fixup size"); ++ } ++ MCValue A = MCValue::get(Target.getSymA(), nullptr, Target.getConstant()); ++ MCValue B = MCValue::get(Target.getSymB()); ++ auto FA = MCFixup::create(Fixup.getOffset(), nullptr, std::get<0>(FK)); ++ auto FB = MCFixup::create(Fixup.getOffset(), nullptr, std::get<1>(FK)); ++ auto &Asm = Layout.getAssembler(); ++ Asm.getWriter().recordRelocation(Asm, Layout, &F, FA, A, FixedValueA); ++ Asm.getWriter().recordRelocation(Asm, Layout, &F, FB, B, FixedValueB); ++ FixedValue = FixedValueA - FixedValueB; ++ return true; ++} ++ + std::unique_ptr + LoongArchAsmBackend::createObjectTargetWriter() const { + return createLoongArchELFObjectWriter( +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index ae9bb8af0419..20f25b5cf53b 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -31,10 +31,15 @@ class LoongArchAsmBackend : public MCAsmBackend { + public: + LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, + const MCTargetOptions &Options) +- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), +- TargetOptions(Options) {} ++ : MCAsmBackend(support::little, ++ LoongArch::fixup_loongarch_relax), ++ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {} + ~LoongArchAsmBackend() override {} + ++ bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F, ++ const MCFixup &Fixup, const MCValue &Target, ++ uint64_t &FixedValue) const override; ++ + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +index ba2d6718cdf9..178fa6e5262b 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +@@ -106,7 +106,9 @@ enum Fixups { + // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. + fixup_loongarch_tls_gd_pc_hi20, + // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. +- fixup_loongarch_tls_gd_hi20 ++ fixup_loongarch_tls_gd_hi20, ++ // Generate an R_LARCH_RELAX which indicates the linker may relax here. ++ fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX + }; + } // end namespace LoongArch + } // end namespace llvm +diff --git a/llvm/test/MC/LoongArch/Misc/subsection.s b/llvm/test/MC/LoongArch/Misc/subsection.s +new file mode 100644 +index 000000000000..0bd22b474536 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Misc/subsection.s +@@ -0,0 +1,38 @@ ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,NORELAX --implicit-check-not=error: ++## TODO: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: ++ ++a: ++ nop ++b: ++ la.pcrel $t0, a ++c: ++ nop ++d: ++ ++.data ++## Positive subsection numbers ++## With relaxation, report an error as c-b is not an assemble-time constant. ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection c-b ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection d-b ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection c-a ++ ++.subsection b-a ++.subsection d-c ++ ++## Negative subsection numbers ++# NORELAX: :[[#@LINE+2]]:14: error: subsection number -8 is not within [0,2147483647] ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection b-c ++# NORELAX: :[[#@LINE+2]]:14: error: subsection number -12 is not within [0,2147483647] ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection b-d ++# NORELAX: :[[#@LINE+2]]:14: error: subsection number -12 is not within [0,2147483647] ++# RELAX: :[[#@LINE+1]]:14: error: cannot evaluate subsection number ++.subsection a-c ++# ERR: :[[#@LINE+1]]:14: error: subsection number -4 is not within [0,2147483647] ++.subsection a-b ++# ERR: :[[#@LINE+1]]:14: error: subsection number -4 is not within [0,2147483647] ++.subsection c-d +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +new file mode 100644 +index 000000000000..532eb4e0561a +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +@@ -0,0 +1,68 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ ++# RUN: | llvm-readobj -r -x .data - | FileCheck %s --check-prefix=NORELAX ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ ++# RUN: | llvm-readobj -r -x .data - | FileCheck %s --check-prefix=RELAX ++ ++# NORELAX: Relocations [ ++# NORELAX-NEXT: Section ({{.*}}) .rela.text { ++# NORELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .text 0x0 ++# NORELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .text 0x0 ++# NORELAX-NEXT: } ++# NORELAX-NEXT: ] ++ ++# NORELAX: Hex dump of section '.data': ++# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c ++# NORELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000808 ++# NORELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 ++ ++# RELAX: Relocations [ ++# RELAX-NEXT: Section ({{.*}}) .rela.text { ++# RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 ++# RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 ++# RELAX-NEXT: } ++# RELAX-NEXT: Section ({{.*}}) .rela.data { ++# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 ++# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0 ++# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0 ++# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0 ++# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0 ++# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 ++# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 ++# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 ++# RELAX-NEXT: } ++# RELAX-NEXT: ] ++ ++# RELAX: Hex dump of section '.data': ++# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 ++# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000808 ++# RELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 ++ ++.text ++.L1: ++ nop ++.L2: ++ .align 4 ++.L3: ++ la.pcrel $t0, .L1 ++.L4: ++ ret ++ ++.data ++## Not emit relocs ++.byte .L2 - .L1 ++.short .L2 - .L1 ++.word .L2 - .L1 ++.dword .L2 - .L1 ++## With relaxation, emit relocs because of the .align making the diff variable. ++## TODO Handle alignment directive. Why they emit relocs now? They returns ++## without folding symbols offset in AttemptToFoldSymbolOffsetDifference(). ++.byte .L3 - .L2 ++.short .L3 - .L2 ++.word .L3 - .L2 ++.dword .L3 - .L2 ++## TODO ++## With relaxation, emit relocs because la.pcrel is a linker-relaxable inst. ++.byte .L4 - .L3 ++.short .L4 - .L3 ++.word .L4 - .L3 ++.dword .L4 - .L3 +-- +2.20.1 + diff --git a/0002-LoongArch-fix-symbol-lookup-error.patch b/0002-LoongArch-fix-symbol-lookup-error.patch deleted file mode 100644 index 8a4913d4f3546cdeb2a4556d1efc5ff152a098ce..0000000000000000000000000000000000000000 --- a/0002-LoongArch-fix-symbol-lookup-error.patch +++ /dev/null @@ -1,211 +0,0 @@ -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -index b20d12495..5245d750d 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def -@@ -2,6 +2,8 @@ - #define LOONGARCH_FEATURE(NAME, KIND) - #endif - -+LOONGARCH_FEATURE("invalid", FK_INVALID) -+LOONGARCH_FEATURE("none", FK_NONE) - LOONGARCH_FEATURE("+64bit", FK_64BIT) - LOONGARCH_FEATURE("+f", FK_FP32) - LOONGARCH_FEATURE("+d", FK_FP64) -@@ -17,6 +19,7 @@ LOONGARCH_FEATURE("+ual", FK_UAL) - #define LOONGARCH_ARCH(NAME, KIND, FEATURES) - #endif - -+LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID) - LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) - LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) - -diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -index 028844187..ff325a76d 100644 ---- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h -@@ -23,6 +23,9 @@ class StringRef; - namespace LoongArch { - - enum FeatureKind : uint32_t { -+ FK_INVALID = 0, -+ FK_NONE = 1, -+ - // 64-bit ISA is available. - FK_64BIT = 1 << 1, - -@@ -64,14 +67,11 @@ struct ArchInfo { - uint32_t Features; - }; - --bool isValidArchName(StringRef Arch); -+ArchKind parseArch(StringRef Arch); - bool getArchFeatures(StringRef Arch, std::vector &Features); --bool isValidCPUName(StringRef TuneCPU); --void fillValidCPUList(SmallVectorImpl &Values); --StringRef getDefaultArch(bool Is64Bit); - - } // namespace LoongArch - - } // namespace llvm - --#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H -+#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H -diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h -index 5ddb1d314..59513fa2f 100644 ---- a/llvm/include/llvm/TargetParser/Triple.h -+++ b/llvm/include/llvm/TargetParser/Triple.h -@@ -846,14 +846,10 @@ public: - : PointerWidth == 64; - } - -- /// Tests whether the target is 32-bit LoongArch. -- bool isLoongArch32() const { return getArch() == Triple::loongarch32; } -- -- /// Tests whether the target is 64-bit LoongArch. -- bool isLoongArch64() const { return getArch() == Triple::loongarch64; } -- - /// Tests whether the target is LoongArch (32- and 64-bit). -- bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); } -+ bool isLoongArch() const { -+ return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; -+ } - - /// Tests whether the target is MIPS 32-bit (little and big endian). - bool isMIPS32() const { -diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td -index 3e9e8b251..7215cd16f 100644 ---- a/llvm/lib/Target/LoongArch/LoongArch.td -+++ b/llvm/lib/Target/LoongArch/LoongArch.td -@@ -135,11 +135,6 @@ include "LoongArchInstrInfo.td" - def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; - def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; - --// Generic 64-bit processor with double-precision floating-point support. --def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, -- FeatureUAL, -- FeatureBasicD]>; -- - // Support generic for compatibility with other targets. The triple will be used - // to change to the appropriate la32/la64 version. - def : ProcessorModel<"generic", NoSchedModel, []>; -diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -index 772d24c5c..faa8c314f 100644 ---- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp -+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp -@@ -1,4 +1,4 @@ --//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====// -+//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. -@@ -27,11 +27,12 @@ const ArchInfo AllArchs[] = { - #include "llvm/TargetParser/LoongArchTargetParser.def" - }; - --bool LoongArch::isValidArchName(StringRef Arch) { -+LoongArch::ArchKind LoongArch::parseArch(StringRef Arch) { - for (const auto A : AllArchs) - if (A.Name == Arch) -- return true; -- return false; -+ return A.Kind; -+ -+ return LoongArch::ArchKind::AK_INVALID; - } - - bool LoongArch::getArchFeatures(StringRef Arch, -@@ -39,22 +40,10 @@ bool LoongArch::getArchFeatures(StringRef Arch, - for (const auto A : AllArchs) { - if (A.Name == Arch) { - for (const auto F : AllFeatures) -- if ((A.Features & F.Kind) == F.Kind) -+ if ((A.Features & F.Kind) == F.Kind && F.Kind != FK_INVALID) - Features.push_back(F.Name); - return true; - } - } - return false; - } -- --bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } -- --void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { -- for (const auto A : AllArchs) -- Values.emplace_back(A.Name); --} -- --StringRef LoongArch::getDefaultArch(bool Is64Bit) { -- // TODO: use a real 32-bit arch name. -- return Is64Bit ? "loongarch64" : ""; --} -diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -index 367a2bef2..599eeeabc 100644 ---- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp -@@ -492,7 +492,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, - bool IsMIPS64 = TargetTriple.isMIPS64(); - bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); - bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; -- bool IsLoongArch64 = TargetTriple.isLoongArch64(); -+ bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64; - bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; - bool IsWindows = TargetTriple.isOSWindows(); - bool IsFuchsia = TargetTriple.isOSFuchsia(); -diff --git a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll -deleted file mode 100644 -index b5435fb90..000000000 ---- a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll -+++ /dev/null -@@ -1,7 +0,0 @@ --; RUN: llc < %s --mtriple=loongarch64 --mattr=+64bit --mcpu=invalidcpu 2>&1 | FileCheck %s -- --; CHECK: {{.*}} is not a recognized processor for this target -- --define void @f() { -- ret void --} -diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll -deleted file mode 100644 -index 35945ae4d..000000000 ---- a/llvm/test/CodeGen/LoongArch/cpus.ll -+++ /dev/null -@@ -1,20 +0,0 @@ --;; This tests that llc accepts all valid LoongArch CPUs. --;; Note the 'generic' names have been tested in cpu-name-generic.ll. -- --; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s --; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s --; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s -- --; CHECK-NOT: {{.*}} is not a recognized processor for this target -- --define void @f() { -- ret void --} -- --define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { -- ret void --} -- --define void @tune_cpu_la464() "tune-cpu"="la464" { -- ret void --} -diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp -index b19699fc0..77de43a16 100644 ---- a/llvm/unittests/TargetParser/TripleTest.cpp -+++ b/llvm/unittests/TargetParser/TripleTest.cpp -@@ -1225,14 +1225,12 @@ TEST(TripleTest, BitWidthPredicates) { - EXPECT_TRUE(T.isArch32Bit()); - EXPECT_FALSE(T.isArch64Bit()); - EXPECT_TRUE(T.isLoongArch()); -- EXPECT_TRUE(T.isLoongArch32()); - - T.setArch(Triple::loongarch64); - EXPECT_FALSE(T.isArch16Bit()); - EXPECT_FALSE(T.isArch32Bit()); - EXPECT_TRUE(T.isArch64Bit()); - EXPECT_TRUE(T.isLoongArch()); -- EXPECT_TRUE(T.isLoongArch64()); - - T.setArch(Triple::dxil); - EXPECT_FALSE(T.isArch16Bit()); diff --git a/0003-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch b/0003-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch new file mode 100644 index 0000000000000000000000000000000000000000..525918e1c42a0d282974f412291412bb0df2e1ba --- /dev/null +++ b/0003-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch @@ -0,0 +1,364 @@ +From f2495d7efb79fdc82af6147f7201d9cf3c91beba Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Wed, 27 Dec 2023 08:51:48 +0800 +Subject: [PATCH 03/66] [LoongArch] Emit R_LARCH_RELAX when expanding some + LoadAddress (#72961) + +Emit relax relocs when expand non-large la.pcrel and non-large la.got on +llvm-mc stage, which like what does on GAS. +1, la.pcrel -> PCALA_HI20 + RELAX + PCALA_LO12 + RELAX +2, la.got -> GOT_PC_HI20 + RELAX + GOT_PC_LO12 + RELAX + +(cherry picked from commit b3ef8dce9811b2725639b0d4fac3f85c7e112817) +Change-Id: I222daf60b36ee70e23c76b753e1d2a3b8148f44b +--- + .../AsmParser/LoongArchAsmParser.cpp | 12 +-- + .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 13 +++ + .../MCTargetDesc/LoongArchMCExpr.cpp | 7 +- + .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 8 +- + llvm/test/MC/LoongArch/Macros/macros-la.s | 84 ++++++++++++++++--- + llvm/test/MC/LoongArch/Misc/subsection.s | 2 +- + .../MC/LoongArch/Relocations/relax-addsub.s | 16 +++- + 7 files changed, 115 insertions(+), 27 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index 94d530306536..a132e645c864 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -86,7 +86,7 @@ class LoongArchAsmParser : public MCTargetAsmParser { + // "emitLoadAddress*" functions. + void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, + const MCExpr *Symbol, SmallVectorImpl &Insts, +- SMLoc IDLoc, MCStreamer &Out); ++ SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false); + + // Helper to emit pseudo instruction "la.abs $rd, sym". + void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); +@@ -749,12 +749,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, + void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, + const MCExpr *Symbol, + SmallVectorImpl &Insts, +- SMLoc IDLoc, MCStreamer &Out) { ++ SMLoc IDLoc, MCStreamer &Out, ++ bool RelaxHint) { + MCContext &Ctx = getContext(); + for (LoongArchAsmParser::Inst &Inst : Insts) { + unsigned Opc = Inst.Opc; + LoongArchMCExpr::VariantKind VK = Inst.VK; +- const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx); ++ const LoongArchMCExpr *LE = ++ LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint); + switch (Opc) { + default: + llvm_unreachable("unexpected opcode"); +@@ -855,7 +857,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, + Insts.push_back( + LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); + +- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); ++ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); + } + + void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, +@@ -901,7 +903,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, + Insts.push_back( + LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); + +- emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); ++ emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); + } + + void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 03fb9e008ae9..08c0820cb862 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -19,6 +19,7 @@ + #include "llvm/MC/MCInstBuilder.h" + #include "llvm/MC/MCInstrInfo.h" + #include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSubtargetInfo.h" + #include "llvm/Support/Casting.h" + #include "llvm/Support/EndianStream.h" + +@@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MO.isExpr() && "getExprOpValue expects only expressions"); ++ bool RelaxCandidate = false; ++ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax); + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); + LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid; + if (Kind == MCExpr::Target) { + const LoongArchMCExpr *LAExpr = cast(Expr); + ++ RelaxCandidate = LAExpr->getRelaxHint(); + switch (LAExpr->getKind()) { + case LoongArchMCExpr::VK_LoongArch_None: + case LoongArchMCExpr::VK_LoongArch_Invalid: +@@ -269,6 +273,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + + Fixups.push_back( + MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); ++ ++ // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax ++ // hint. ++ if (EnableRelax && RelaxCandidate) { ++ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); ++ Fixups.push_back(MCFixup::create( ++ 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc())); ++ } ++ + return 0; + } + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +index 993111552a31..82c992b1cc8c 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +@@ -25,9 +25,10 @@ using namespace llvm; + + #define DEBUG_TYPE "loongarch-mcexpr" + +-const LoongArchMCExpr * +-LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { +- return new (Ctx) LoongArchMCExpr(Expr, Kind); ++const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr, ++ VariantKind Kind, MCContext &Ctx, ++ bool Hint) { ++ return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint); + } + + void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +index 0945cf82db86..93251f824103 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +@@ -67,16 +67,18 @@ public: + private: + const MCExpr *Expr; + const VariantKind Kind; ++ const bool RelaxHint; + +- explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind) +- : Expr(Expr), Kind(Kind) {} ++ explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint) ++ : Expr(Expr), Kind(Kind), RelaxHint(Hint) {} + + public: + static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind, +- MCContext &Ctx); ++ MCContext &Ctx, bool Hint = false); + + VariantKind getKind() const { return Kind; } + const MCExpr *getSubExpr() const { return Expr; } ++ bool getRelaxHint() const { return RelaxHint; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, +diff --git a/llvm/test/MC/LoongArch/Macros/macros-la.s b/llvm/test/MC/LoongArch/Macros/macros-la.s +index 924e4326b8e5..1a1d12d7d7df 100644 +--- a/llvm/test/MC/LoongArch/Macros/macros-la.s ++++ b/llvm/test/MC/LoongArch/Macros/macros-la.s +@@ -1,66 +1,128 @@ + # RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t ++# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax ++# RUN: llvm-readobj -r %t.relax | FileCheck %s --check-prefixes=RELOC,RELAX ++ ++# RELOC: Relocations [ ++# RELOC-NEXT: Section ({{.*}}) .rela.text { + + la.abs $a0, sym_abs + # CHECK: lu12i.w $a0, %abs_hi20(sym_abs) + # CHECK-NEXT: ori $a0, $a0, %abs_lo12(sym_abs) + # CHECK-NEXT: lu32i.d $a0, %abs64_lo20(sym_abs) + # CHECK-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_abs) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_ABS_HI20 sym_abs 0x0 ++# RELOC-NEXT: R_LARCH_ABS_LO12 sym_abs 0x0 ++# RELOC-NEXT: R_LARCH_ABS64_LO20 sym_abs 0x0 ++# RELOC-NEXT: R_LARCH_ABS64_HI12 sym_abs 0x0 + + la.pcrel $a0, sym_pcrel +-# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel) ++# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel) + # CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(sym_pcrel) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel 0x0 ++# RELAX-NEXT: R_LARCH_RELAX - 0x0 ++# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel 0x0 ++# RELAX-NEXT: R_LARCH_RELAX - 0x0 + + la.pcrel $a0, $a1, sym_pcrel_large +-# CHECK: pcalau12i $a0, %pc_hi20(sym_pcrel_large) ++# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel_large) + # CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(sym_pcrel_large) + # CHECK-NEXT: lu32i.d $a1, %pc64_lo20(sym_pcrel_large) + # CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_pcrel_large) + # CHECK-NEXT: add.d $a0, $a0, $a1 ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel_large 0x0 ++# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel_large 0x0 ++# RELOC-NEXT: R_LARCH_PCALA64_LO20 sym_pcrel_large 0x0 ++# RELOC-NEXT: R_LARCH_PCALA64_HI12 sym_pcrel_large 0x0 + + la.got $a0, sym_got +-# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got) ++# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got) + # CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym_got) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got 0x0 ++# RELAX-NEXT: R_LARCH_RELAX - 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got 0x0 ++# RELAX-NEXT: R_LARCH_RELAX - 0x0 + + la.got $a0, $a1, sym_got_large +-# CHECK: pcalau12i $a0, %got_pc_hi20(sym_got_large) ++# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got_large) + # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_got_large) + # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_got_large) + # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_got_large) + # CHECK-NEXT: ldx.d $a0, $a0, $a1 ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_got_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_got_large 0x0 + + la.tls.le $a0, sym_le +-# CHECK: lu12i.w $a0, %le_hi20(sym_le) ++# CHECK-NEXT: lu12i.w $a0, %le_hi20(sym_le) + # CHECK-NEXT: ori $a0, $a0, %le_lo12(sym_le) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_LE_HI20 sym_le 0x0 ++# RELOC-NEXT: R_LARCH_TLS_LE_LO12 sym_le 0x0 + + la.tls.ie $a0, sym_ie +-# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie) ++# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie) + # CHECK-NEXT: ld.d $a0, $a0, %ie_pc_lo12(sym_ie) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie 0x0 ++# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie 0x0 + + la.tls.ie $a0, $a1, sym_ie_large +-# CHECK: pcalau12i $a0, %ie_pc_hi20(sym_ie_large) ++# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie_large) + # CHECK-NEXT: addi.d $a1, $zero, %ie_pc_lo12(sym_ie_large) + # CHECK-NEXT: lu32i.d $a1, %ie64_pc_lo20(sym_ie_large) + # CHECK-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large) + # CHECK-NEXT: ldx.d $a0, $a0, $a1 ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie_large 0x0 ++# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie_large 0x0 ++# RELOC-NEXT: R_LARCH_TLS_IE64_PC_LO20 sym_ie_large 0x0 ++# RELOC-NEXT: R_LARCH_TLS_IE64_PC_HI12 sym_ie_large 0x0 + + la.tls.ld $a0, sym_ld +-# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld) ++# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld) + # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_ld) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld 0x0 + + la.tls.ld $a0, $a1, sym_ld_large +-# CHECK: pcalau12i $a0, %ld_pc_hi20(sym_ld_large) ++# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld_large) + # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_ld_large) + # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_ld_large) + # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_ld_large) + # CHECK-NEXT: add.d $a0, $a0, $a1 ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_ld_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_ld_large 0x0 + + la.tls.gd $a0, sym_gd +-# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd) ++# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd) + # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_gd) ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd 0x0 + + la.tls.gd $a0, $a1, sym_gd_large +-# CHECK: pcalau12i $a0, %gd_pc_hi20(sym_gd_large) ++# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd_large) + # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_gd_large) + # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_gd_large) + # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_gd_large) + # CHECK-NEXT: add.d $a0, $a0, $a1 ++# CHECK-EMPTY: ++# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_gd_large 0x0 ++# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_gd_large 0x0 ++ ++# RELOC-NEXT: } ++# RELOC-NEXT: ] +diff --git a/llvm/test/MC/LoongArch/Misc/subsection.s b/llvm/test/MC/LoongArch/Misc/subsection.s +index 0bd22b474536..566a2408d691 100644 +--- a/llvm/test/MC/LoongArch/Misc/subsection.s ++++ b/llvm/test/MC/LoongArch/Misc/subsection.s +@@ -1,5 +1,5 @@ + # RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,NORELAX --implicit-check-not=error: +-## TODO: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error: + + a: + nop +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +index 532eb4e0561a..c4454f5bb98d 100644 +--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s ++++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +@@ -18,7 +18,9 @@ + # RELAX: Relocations [ + # RELAX-NEXT: Section ({{.*}}) .rela.text { + # RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 ++# RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 ++# RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: Section ({{.*}}) .rela.data { + # RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 +@@ -29,13 +31,21 @@ + # RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 + # RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 + # RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 ++# RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 ++# RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 ++# RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 ++# RELAX-NEXT: 0x1F R_LARCH_SUB16 .L3 0x0 ++# RELAX-NEXT: 0x21 R_LARCH_ADD32 .L4 0x0 ++# RELAX-NEXT: 0x21 R_LARCH_SUB32 .L3 0x0 ++# RELAX-NEXT: 0x25 R_LARCH_ADD64 .L4 0x0 ++# RELAX-NEXT: 0x25 R_LARCH_SUB64 .L3 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: ] + + # RELAX: Hex dump of section '.data': + # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 +-# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000808 +-# RELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 ++# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 ++# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 + + .text + .L1: +@@ -60,8 +70,6 @@ + .short .L3 - .L2 + .word .L3 - .L2 + .dword .L3 - .L2 +-## TODO +-## With relaxation, emit relocs because la.pcrel is a linker-relaxable inst. + .byte .L4 - .L3 + .short .L4 - .L3 + .word .L4 - .L3 +-- +2.20.1 + diff --git a/0004-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch b/0004-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch new file mode 100644 index 0000000000000000000000000000000000000000..aad528f090b8fd134511236696d5b146d870bcd1 --- /dev/null +++ b/0004-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch @@ -0,0 +1,123 @@ +From be6e5c566f49bee5efe3d710bdd321e15d8d95ea Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Thu, 14 Mar 2024 12:10:50 +0800 +Subject: [PATCH 04/66] [MC][LoongArch] Add AlignFragment size if layout is + available and not need insert nops (#76552) + +Due to delayed decision for ADD/SUB relocations, RISCV and LoongArch may +go slow fragment walk path with available layout. When RISCV (or +LoongArch in the future) don't need insert nops, that means relax is +disabled. With available layout and not needing insert nops, the size of +AlignFragment should be a constant. So we can add it to Displacement for +folding A-B. + +(cherry picked from commit 0731567a31e4ade97c27801045156a88c4589704) +Change-Id: I554d6766bd7f688204e956e4a6431574b4c511c9 +--- + llvm/lib/MC/MCExpr.cpp | 6 +++++ + llvm/test/MC/LoongArch/Misc/cfi-advance.s | 27 +++++++++++++++++++ + .../MC/LoongArch/Relocations/relax-addsub.s | 17 +++--------- + 3 files changed, 37 insertions(+), 13 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Misc/cfi-advance.s + +diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp +index 5a6596f93824..a561fed11179 100644 +--- a/llvm/lib/MC/MCExpr.cpp ++++ b/llvm/lib/MC/MCExpr.cpp +@@ -707,8 +707,14 @@ static void AttemptToFoldSymbolOffsetDifference( + } + + int64_t Num; ++ unsigned Count; + if (DF) { + Displacement += DF->getContents().size(); ++ } else if (auto *AF = dyn_cast(FI); ++ AF && Layout && ++ !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( ++ *AF, Count)) { ++ Displacement += Asm->computeFragmentSize(*Layout, *AF); + } else if (auto *FF = dyn_cast(FI); + FF && FF->getNumValues().evaluateAsAbsolute(Num)) { + Displacement += Num * FF->getValueSize(); +diff --git a/llvm/test/MC/LoongArch/Misc/cfi-advance.s b/llvm/test/MC/LoongArch/Misc/cfi-advance.s +new file mode 100644 +index 000000000000..662c43e6bcea +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Misc/cfi-advance.s +@@ -0,0 +1,27 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=-relax %s -o %t.o ++# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOC %s ++# RUN: llvm-dwarfdump --debug-frame %t.o | FileCheck --check-prefix=DWARFDUMP %s ++ ++# RELOC: Relocations [ ++# RELOC-NEXT: .rela.eh_frame { ++# RELOC-NEXT: 0x1C R_LARCH_32_PCREL .text 0x0 ++# RELOC-NEXT: } ++# RELOC-NEXT: ] ++# DWARFDUMP: DW_CFA_advance_loc: 4 ++# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 ++# DWARFDUMP-NEXT: DW_CFA_advance_loc: 8 ++# DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 ++ ++ .text ++ .globl test ++ .p2align 2 ++ .type test,@function ++test: ++ .cfi_startproc ++ nop ++ .cfi_def_cfa_offset 8 ++ .p2align 3 ++ nop ++ .cfi_def_cfa_offset 8 ++ nop ++ .cfi_endproc +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +index c4454f5bb98d..14922657ae89 100644 +--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s ++++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +@@ -23,14 +23,6 @@ + # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: Section ({{.*}}) .rela.data { +-# RELAX-NEXT: 0xF R_LARCH_ADD8 .L3 0x0 +-# RELAX-NEXT: 0xF R_LARCH_SUB8 .L2 0x0 +-# RELAX-NEXT: 0x10 R_LARCH_ADD16 .L3 0x0 +-# RELAX-NEXT: 0x10 R_LARCH_SUB16 .L2 0x0 +-# RELAX-NEXT: 0x12 R_LARCH_ADD32 .L3 0x0 +-# RELAX-NEXT: 0x12 R_LARCH_SUB32 .L2 0x0 +-# RELAX-NEXT: 0x16 R_LARCH_ADD64 .L3 0x0 +-# RELAX-NEXT: 0x16 R_LARCH_SUB64 .L2 0x0 + # RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 + # RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 + # RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 +@@ -43,8 +35,8 @@ + # RELAX-NEXT: ] + + # RELAX: Hex dump of section '.data': +-# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000 +-# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 ++# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c ++# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000 + # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 + + .text +@@ -63,13 +55,12 @@ + .short .L2 - .L1 + .word .L2 - .L1 + .dword .L2 - .L1 +-## With relaxation, emit relocs because of the .align making the diff variable. +-## TODO Handle alignment directive. Why they emit relocs now? They returns +-## without folding symbols offset in AttemptToFoldSymbolOffsetDifference(). ++## TODO Handle alignment directive. + .byte .L3 - .L2 + .short .L3 - .L2 + .word .L3 - .L2 + .dword .L3 - .L2 ++## With relaxation, emit relocs because the la.pcrel makes the diff variable. + .byte .L4 - .L3 + .short .L4 - .L3 + .word .L4 - .L3 +-- +2.20.1 + diff --git a/0005-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch b/0005-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch new file mode 100644 index 0000000000000000000000000000000000000000..2ab3f4e8cddcda4eb9fd845104865505440798d6 --- /dev/null +++ b/0005-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch @@ -0,0 +1,633 @@ +From 8d7b71890179d32474b3a1a1c627481bd5a2327d Mon Sep 17 00:00:00 2001 +From: zhanglimin +Date: Fri, 15 Mar 2024 14:39:48 +0800 +Subject: [PATCH 05/66] [LoongArch][RISCV] Support + R_LARCH_{ADD,SUB}_ULEB128/R_RISCV_{SET,SUB}_ULEB128 for .uleb128 directives + +This patch is originally from three upstream commits: +1, R_LARCH_{ADD,SUB}_ULEB128 are originally landed from b57159cb(#76433). +2, R_RISCV_{SET,SUB}_ULEB128 are originally supported from 1df5ea29. Among it, we change +the default behaviour of `-riscv-uleb128-reloc` to not produce uleb128 reloc, in order +to avoid any other side-effects due to the updated implementation of `MCAssembler::relaxLEB()` +function. And at the same time, we ensure that this patch can't introduce new default traits +(such as the generation for uleb128 reloc) on RISCV in this version. +3, Fix invalid-sleb.s in original commit d7398a35. + +Change-Id: Ie687b7d8483c76cf647141162641db1a9d819a04 +--- + .../llvm/BinaryFormat/ELFRelocs/RISCV.def | 2 + + llvm/include/llvm/MC/MCAsmBackend.h | 8 +++ + llvm/include/llvm/MC/MCFixup.h | 1 + + llvm/include/llvm/MC/MCFragment.h | 9 ++- + llvm/lib/MC/MCAsmBackend.cpp | 1 + + llvm/lib/MC/MCAssembler.cpp | 39 ++++++++-- + .../MCTargetDesc/LoongArchAsmBackend.cpp | 69 ++++++++++++++---- + .../MCTargetDesc/LoongArchAsmBackend.h | 3 + + .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 27 +++++++ + .../RISCV/MCTargetDesc/RISCVAsmBackend.h | 2 + + llvm/test/MC/ELF/RISCV/gen-dwarf.s | 5 +- + llvm/test/MC/LoongArch/Relocations/leb128.s | 72 +++++++++++++++++++ + .../MC/LoongArch/Relocations/relax-addsub.s | 57 +++++++++++---- + llvm/test/MC/X86/invalid-sleb.s | 5 -- + 14 files changed, 252 insertions(+), 48 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Relocations/leb128.s + delete mode 100644 llvm/test/MC/X86/invalid-sleb.s + +diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +index 9a126df01531..c7fd6490041c 100644 +--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def ++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +@@ -55,3 +55,5 @@ ELF_RELOC(R_RISCV_SET32, 56) + ELF_RELOC(R_RISCV_32_PCREL, 57) + ELF_RELOC(R_RISCV_IRELATIVE, 58) + ELF_RELOC(R_RISCV_PLT32, 59) ++ELF_RELOC(R_RISCV_SET_ULEB128, 60) ++ELF_RELOC(R_RISCV_SUB_ULEB128, 61) +diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h +index 5e08fb41679b..968a767b17f8 100644 +--- a/llvm/include/llvm/MC/MCAsmBackend.h ++++ b/llvm/include/llvm/MC/MCAsmBackend.h +@@ -21,6 +21,7 @@ class MCAlignFragment; + class MCDwarfCallFrameFragment; + class MCDwarfLineAddrFragment; + class MCFragment; ++class MCLEBFragment; + class MCRelaxableFragment; + class MCSymbol; + class MCAsmLayout; +@@ -194,6 +195,13 @@ public: + return false; + } + ++ // Defined by linker relaxation targets to possibly emit LEB128 relocations ++ // and set Value at the relocated location. ++ virtual std::pair ++ relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, int64_t &Value) const { ++ return std::make_pair(false, false); ++ } ++ + /// @} + + /// Returns the minimum size of a nop in bytes on this target. The assembler +diff --git a/llvm/include/llvm/MC/MCFixup.h b/llvm/include/llvm/MC/MCFixup.h +index 069ca058310f..7f48a90cb1ec 100644 +--- a/llvm/include/llvm/MC/MCFixup.h ++++ b/llvm/include/llvm/MC/MCFixup.h +@@ -25,6 +25,7 @@ enum MCFixupKind { + FK_Data_4, ///< A four-byte fixup. + FK_Data_8, ///< A eight-byte fixup. + FK_Data_6b, ///< A six-bits fixup. ++ FK_Data_leb128, ///< A leb128 fixup. + FK_PCRel_1, ///< A one-byte pc relative fixup. + FK_PCRel_2, ///< A two-byte pc relative fixup. + FK_PCRel_4, ///< A four-byte pc relative fixup. +diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h +index 7be4792a4521..e965732010fe 100644 +--- a/llvm/include/llvm/MC/MCFragment.h ++++ b/llvm/include/llvm/MC/MCFragment.h +@@ -428,7 +428,7 @@ public: + } + }; + +-class MCLEBFragment : public MCFragment { ++class MCLEBFragment final : public MCEncodedFragmentWithFixups<10, 1> { + /// True if this is a sleb128, false if uleb128. + bool IsSigned; + +@@ -439,17 +439,16 @@ class MCLEBFragment : public MCFragment { + + public: + MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr) +- : MCFragment(FT_LEB, false, Sec), IsSigned(IsSigned_), Value(&Value_) { ++ : MCEncodedFragmentWithFixups<10, 1>(FT_LEB, false, Sec), ++ IsSigned(IsSigned_), Value(&Value_) { + Contents.push_back(0); + } + + const MCExpr &getValue() const { return *Value; } ++ void setValue(const MCExpr *Expr) { Value = Expr; } + + bool isSigned() const { return IsSigned; } + +- SmallString<8> &getContents() { return Contents; } +- const SmallString<8> &getContents() const { return Contents; } +- + /// @} + + static bool classof(const MCFragment *F) { +diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp +index 64bbc63719c7..2eef7d363fe7 100644 +--- a/llvm/lib/MC/MCAsmBackend.cpp ++++ b/llvm/lib/MC/MCAsmBackend.cpp +@@ -89,6 +89,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + {"FK_Data_4", 0, 32, 0}, + {"FK_Data_8", 0, 64, 0}, + {"FK_Data_6b", 0, 6, 0}, ++ {"FK_Data_leb128", 0, 0, 0}, + {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, +diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp +index 55ed1a285cd7..86c798ec9e27 100644 +--- a/llvm/lib/MC/MCAssembler.cpp ++++ b/llvm/lib/MC/MCAssembler.cpp +@@ -918,6 +918,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) { + Contents = DF.getContents(); + break; + } ++ case MCFragment::FT_LEB: { ++ auto &LF = cast(Frag); ++ Fixups = LF.getFixups(); ++ Contents = LF.getContents(); ++ break; ++ } + case MCFragment::FT_PseudoProbe: { + MCPseudoProbeAddrFragment &PF = cast(Frag); + Fixups = PF.getFixups(); +@@ -1006,12 +1012,31 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, + } + + bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { +- uint64_t OldSize = LF.getContents().size(); ++ const unsigned OldSize = static_cast(LF.getContents().size()); ++ unsigned PadTo = OldSize; + int64_t Value; +- bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout); +- if (!Abs) +- report_fatal_error("sleb128 and uleb128 expressions must be absolute"); +- SmallString<8> &Data = LF.getContents(); ++ SmallVectorImpl &Data = LF.getContents(); ++ LF.getFixups().clear(); ++ // Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols ++ // requires that .uleb128 A-B is foldable where A and B reside in different ++ // fragments. This is used by __gcc_except_table. ++ bool Abs = getSubsectionsViaSymbols() ++ ? LF.getValue().evaluateKnownAbsolute(Value, Layout) ++ : LF.getValue().evaluateAsAbsolute(Value, Layout); ++ if (!Abs) { ++ bool Relaxed, UseZeroPad; ++ std::tie(Relaxed, UseZeroPad) = getBackend().relaxLEB128(LF, Layout, Value); ++ if (!Relaxed) { ++ getContext().reportError(LF.getValue().getLoc(), ++ Twine(LF.isSigned() ? ".s" : ".u") + ++ "leb128 expression is not absolute"); ++ LF.setValue(MCConstantExpr::create(0, Context)); ++ } ++ uint8_t Tmp[10]; // maximum size: ceil(64/7) ++ PadTo = std::max(PadTo, encodeULEB128(uint64_t(Value), Tmp)); ++ if (UseZeroPad) ++ Value = 0; ++ } + Data.clear(); + raw_svector_ostream OSE(Data); + // The compiler can generate EH table assembly that is impossible to assemble +@@ -1019,9 +1044,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { + // to a later alignment fragment. To accommodate such tables, relaxation can + // only increase an LEB fragment size here, not decrease it. See PR35809. + if (LF.isSigned()) +- encodeSLEB128(Value, OSE, OldSize); ++ encodeSLEB128(Value, OSE, PadTo); + else +- encodeULEB128(Value, OSE, OldSize); ++ encodeULEB128(Value, OSE, PadTo); + return OldSize != LF.getContents().size(); + } + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index 1ed047a8e632..9227d4d6afed 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -92,6 +92,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: ++ case FK_Data_leb128: + return Value; + case LoongArch::fixup_loongarch_b16: { + if (!isInt<18>(Value)) +@@ -129,6 +130,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + } + } + ++static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup, ++ MutableArrayRef Data, uint64_t Value) { ++ unsigned I; ++ for (I = 0; I != Data.size() && Value; ++I, Value >>= 7) ++ Data[I] |= uint8_t(Value & 0x7f); ++ if (Value) ++ Ctx.reportError(Fixup.getLoc(), "Invalid uleb128 value!"); ++} ++ + void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, +@@ -144,6 +154,10 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, + MCFixupKindInfo Info = getFixupKindInfo(Kind); + MCContext &Ctx = Asm.getContext(); + ++ // Fixup leb128 separately. ++ if (Fixup.getTargetKind() == FK_Data_leb128) ++ return fixupLeb128(Ctx, Fixup, Data, Value); ++ + // Apply any target-specific value adjustments. + Value = adjustFixupValue(Fixup, Value, Ctx); + +@@ -173,6 +187,7 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: ++ case FK_Data_leb128: + return !Target.isAbsolute(); + } + } +@@ -202,9 +217,24 @@ getRelocPairForSize(unsigned Size) { + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); ++ case 128: ++ return std::make_pair( ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD_ULEB128), ++ MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB_ULEB128)); + } + } + ++std::pair LoongArchAsmBackend::relaxLEB128(MCLEBFragment &LF, ++ MCAsmLayout &Layout, ++ int64_t &Value) const { ++ const MCExpr &Expr = LF.getValue(); ++ if (LF.isSigned() || !Expr.evaluateKnownAbsolute(Value, Layout)) ++ return std::make_pair(false, false); ++ LF.getFixups().push_back( ++ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); ++ return std::make_pair(true, true); ++} ++ + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + // We mostly follow binutils' convention here: align to 4-byte boundary with a +@@ -226,21 +256,27 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, + uint64_t &FixedValue) const { + std::pair FK; + uint64_t FixedValueA, FixedValueB; +- const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); +- const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); +- +- // We need record relocation if SecA != SecB. Usually SecB is same as the +- // section of Fixup, which will be record the relocation as PCRel. If SecB +- // is not same as the section of Fixup, it will report error. Just return +- // false and then this work can be finished by handleFixup. +- if (&SecA != &SecB) +- return false; +- +- // In SecA == SecB case. If the linker relaxation is enabled, we need record +- // the ADD, SUB relocations. Otherwise the FixedValue has already been +- // calculated out in evaluateFixup, return true and avoid record relocations. +- if (!STI.hasFeature(LoongArch::FeatureRelax)) +- return true; ++ const MCSymbol &SA = Target.getSymA()->getSymbol(); ++ const MCSymbol &SB = Target.getSymB()->getSymbol(); ++ ++ bool force = !SA.isInSection() || !SB.isInSection(); ++ if (!force) { ++ const MCSection &SecA = SA.getSection(); ++ const MCSection &SecB = SB.getSection(); ++ ++ // We need record relocation if SecA != SecB. Usually SecB is same as the ++ // section of Fixup, which will be record the relocation as PCRel. If SecB ++ // is not same as the section of Fixup, it will report error. Just return ++ // false and then this work can be finished by handleFixup. ++ if (&SecA != &SecB) ++ return false; ++ ++ // In SecA == SecB case. If the linker relaxation is enabled, we need record ++ // the ADD, SUB relocations. Otherwise the FixedValue has already been calc- ++ // ulated out in evaluateFixup, return true and avoid record relocations. ++ if (!STI.hasFeature(LoongArch::FeatureRelax)) ++ return true; ++ } + + switch (Fixup.getKind()) { + case llvm::FK_Data_1: +@@ -255,6 +291,9 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, + case llvm::FK_Data_8: + FK = getRelocPairForSize(64); + break; ++ case llvm::FK_Data_leb128: ++ FK = getRelocPairForSize(128); ++ break; + default: + llvm_unreachable("unsupported fixup size"); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 20f25b5cf53b..49801e4fd81a 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -65,6 +65,9 @@ public: + void relaxInstruction(MCInst &Inst, + const MCSubtargetInfo &STI) const override {} + ++ std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, ++ int64_t &Value) const override; ++ + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; + +diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +index 1b890fbe041a..5c651aa93225 100644 +--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp ++++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +@@ -19,6 +19,7 @@ + #include "llvm/MC/MCObjectWriter.h" + #include "llvm/MC/MCSymbol.h" + #include "llvm/MC/MCValue.h" ++#include "llvm/Support/CommandLine.h" + #include "llvm/Support/Endian.h" + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/ErrorHandling.h" +@@ -27,6 +28,13 @@ + + using namespace llvm; + ++// Temporary workaround for old linkers that do not support ULEB128 relocations, ++// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair ++// implemented in Clang/LLVM. ++static cl::opt ULEB128Reloc( ++ "riscv-uleb128-reloc", cl::init(false), cl::Hidden, ++ cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); ++ + std::optional RISCVAsmBackend::getFixupKind(StringRef Name) const { + if (STI.getTargetTriple().isOSBinFormatELF()) { + unsigned Type; +@@ -126,6 +134,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: ++ case FK_Data_leb128: + if (Target.isAbsolute()) + return false; + break; +@@ -330,6 +339,19 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, + return true; + } + ++std::pair RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, ++ MCAsmLayout &Layout, ++ int64_t &Value) const { ++ if (LF.isSigned()) ++ return std::make_pair(false, false); ++ const MCExpr &Expr = LF.getValue(); ++ if (ULEB128Reloc) { ++ LF.getFixups().push_back( ++ MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); ++ } ++ return std::make_pair(Expr.evaluateKnownAbsolute(Value, Layout), false); ++} ++ + // Given a compressed control flow instruction this function returns + // the expanded instruction. + unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const { +@@ -416,6 +438,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + case FK_Data_4: + case FK_Data_8: + case FK_Data_6b: ++ case FK_Data_leb128: + return Value; + case RISCV::fixup_riscv_set_6b: + return Value & 0x03; +@@ -596,6 +619,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, + TA = ELF::R_RISCV_ADD64; + TB = ELF::R_RISCV_SUB64; + break; ++ case llvm::FK_Data_leb128: ++ TA = ELF::R_RISCV_SET_ULEB128; ++ TB = ELF::R_RISCV_SUB_ULEB128; ++ break; + default: + llvm_unreachable("unsupported fixup size"); + } +diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +index 0ea1f32e8296..edefb171bcdc 100644 +--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h ++++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +@@ -99,6 +99,8 @@ public: + bool &WasRelaxed) const override; + bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, + bool &WasRelaxed) const override; ++ std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, ++ int64_t &Value) const override; + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; +diff --git a/llvm/test/MC/ELF/RISCV/gen-dwarf.s b/llvm/test/MC/ELF/RISCV/gen-dwarf.s +index 2235559d5f35..2a7dc777e70c 100644 +--- a/llvm/test/MC/ELF/RISCV/gen-dwarf.s ++++ b/llvm/test/MC/ELF/RISCV/gen-dwarf.s +@@ -9,7 +9,7 @@ + ## emit special opcodes to make .debug_line smaller, but we don't do this for + ## consistency. + +-# RUN: llvm-mc -filetype=obj -triple=riscv64 -g -dwarf-version=5 -mattr=+relax < %s -o %t ++# RUN: llvm-mc -filetype=obj -triple=riscv64 -g -dwarf-version=5 -mattr=+relax -riscv-uleb128-reloc=1 < %s -o %t + # RUN: llvm-dwarfdump -eh-frame -debug-line -debug-rnglists -v %t | FileCheck %s + # RUN: llvm-readobj -r -x .eh_frame %t | FileCheck %s --check-prefix=RELOC + +@@ -48,9 +48,10 @@ + # RELOC-NEXT: 0x34 R_RISCV_32_PCREL 0x0 + # RELOC-NEXT: } + +-## TODO A section needs two relocations. + # RELOC: Section ([[#]]) .rela.debug_rnglists { + # RELOC-NEXT: 0xD R_RISCV_64 .text.foo 0x0 ++# RELOC-NEXT: 0x15 R_RISCV_SET_ULEB128 0x0 ++# RELOC-NEXT: 0x15 R_RISCV_SUB_ULEB128 .text.foo 0x0 + # RELOC-NEXT: 0x17 R_RISCV_64 .text.bar 0x0 + # RELOC-NEXT: } + +diff --git a/llvm/test/MC/LoongArch/Relocations/leb128.s b/llvm/test/MC/LoongArch/Relocations/leb128.s +new file mode 100644 +index 000000000000..7a96ec551b76 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/leb128.s +@@ -0,0 +1,72 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t ++# RUN: llvm-readobj -r -x .alloc_w %t | FileCheck --check-prefixes=CHECK,NORELAX %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax ++# RUN: llvm-readobj -r -x .alloc_w %t.relax | FileCheck --check-prefixes=CHECK,RELAX %s ++ ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax --defsym ERR=1 %s -o /dev/null 2>&1 | \ ++# RUN: FileCheck %s --check-prefix=ERR ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax --defsym ERR=1 %s -o /dev/null 2>&1 | \ ++# RUN: FileCheck %s --check-prefix=ERR ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: .rela.alloc_w { ++# RELAX-NEXT: 0x0 R_LARCH_ADD_ULEB128 w1 0x0 ++# RELAX-NEXT: 0x0 R_LARCH_SUB_ULEB128 w 0x0 ++# RELAX-NEXT: 0x1 R_LARCH_ADD_ULEB128 w2 0x0 ++# RELAX-NEXT: 0x1 R_LARCH_SUB_ULEB128 w1 0x0 ++# CHECK-NEXT: 0x2 R_LARCH_PCALA_HI20 foo 0x0 ++# RELAX-NEXT: 0x2 R_LARCH_RELAX - 0x0 ++# CHECK-NEXT: 0x6 R_LARCH_PCALA_LO12 foo 0x0 ++# RELAX-NEXT: 0x6 R_LARCH_RELAX - 0x0 ++# RELAX-NEXT: 0xA R_LARCH_ADD_ULEB128 w2 0x0 ++# RELAX-NEXT: 0xA R_LARCH_SUB_ULEB128 w1 0x0 ++# RELAX-NEXT: 0xB R_LARCH_ADD_ULEB128 w2 0x78 ++# RELAX-NEXT: 0xB R_LARCH_SUB_ULEB128 w1 0x0 ++# RELAX-NEXT: 0xD R_LARCH_ADD_ULEB128 w1 0x0 ++# RELAX-NEXT: 0xD R_LARCH_SUB_ULEB128 w2 0x0 ++# RELAX-NEXT: 0x17 R_LARCH_ADD_ULEB128 w3 0x6F ++# RELAX-NEXT: 0x17 R_LARCH_SUB_ULEB128 w2 0x0 ++# RELAX-NEXT: 0x18 R_LARCH_ADD_ULEB128 w3 0x71 ++# RELAX-NEXT: 0x18 R_LARCH_SUB_ULEB128 w2 0x0 ++# CHECK-NEXT: } ++# CHECK-NEXT: ] ++ ++# CHECK: Hex dump of section '.alloc_w': ++# NORELAX-NEXT: 0x00000000 02080c00 001a8c01 c0020880 01f8ffff ++# NORELAX-NEXT: 0x00000010 ffffffff ffff017f 8101 ++# RELAX-NEXT: 0x00000000 00000c00 001a8c01 c0020080 00808080 ++# RELAX-NEXT: 0x00000010 80808080 80800000 8000 ++ ++.section .alloc_w,"ax",@progbits; w: ++.uleb128 w1-w # w1 is later defined in the same section ++.uleb128 w2-w1 # w1 and w2 are separated by a linker relaxable instruction ++w1: ++ la.pcrel $t0, foo ++w2: ++.uleb128 w2-w1 # 0x08 ++.uleb128 w2-w1+120 # 0x0180 ++.uleb128 -(w2-w1) # 0x01fffffffffffffffff8 ++.uleb128 w3-w2+111 # 0x7f ++.uleb128 w3-w2+113 # 0x0181 ++w3: ++ ++.ifdef ERR ++# ERR: :[[#@LINE+1]]:16: error: .uleb128 expression is not absolute ++.uleb128 extern-w # extern is undefined ++# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute ++.uleb128 w-extern ++# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute ++.uleb128 x-w # x is later defined in another section ++ ++.section .alloc_x,"aw",@progbits; x: ++# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute ++.uleb128 y-x ++.section .alloc_y,"aw",@progbits; y: ++# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute ++.uleb128 x-y ++ ++# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute ++.uleb128 extern ++# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute ++.uleb128 y ++.endif +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +index 14922657ae89..cd01332afd0b 100644 +--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s ++++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +@@ -8,12 +8,23 @@ + # NORELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .text 0x0 + # NORELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .text 0x0 + # NORELAX-NEXT: } ++# NORELAX-NEXT: Section ({{.*}}) .rela.data { ++# NORELAX-NEXT: 0x30 R_LARCH_ADD8 foo 0x0 ++# NORELAX-NEXT: 0x30 R_LARCH_SUB8 .text 0x10 ++# NORELAX-NEXT: 0x31 R_LARCH_ADD16 foo 0x0 ++# NORELAX-NEXT: 0x31 R_LARCH_SUB16 .text 0x10 ++# NORELAX-NEXT: 0x33 R_LARCH_ADD32 foo 0x0 ++# NORELAX-NEXT: 0x33 R_LARCH_SUB32 .text 0x10 ++# NORELAX-NEXT: 0x37 R_LARCH_ADD64 foo 0x0 ++# NORELAX-NEXT: 0x37 R_LARCH_SUB64 .text 0x10 ++# NORELAX-NEXT: } + # NORELAX-NEXT: ] + + # NORELAX: Hex dump of section '.data': +-# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c +-# NORELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000808 +-# NORELAX-NEXT: 0x00000020 00080000 00080000 00000000 00 ++# NORELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 ++# NORELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c ++# NORELAX-NEXT: 0x00000020 08080008 00000008 00000000 00000008 ++# NORELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 + + # RELAX: Relocations [ + # RELAX-NEXT: Section ({{.*}}) .rela.text { +@@ -23,21 +34,32 @@ + # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: Section ({{.*}}) .rela.data { +-# RELAX-NEXT: 0x1E R_LARCH_ADD8 .L4 0x0 +-# RELAX-NEXT: 0x1E R_LARCH_SUB8 .L3 0x0 +-# RELAX-NEXT: 0x1F R_LARCH_ADD16 .L4 0x0 +-# RELAX-NEXT: 0x1F R_LARCH_SUB16 .L3 0x0 +-# RELAX-NEXT: 0x21 R_LARCH_ADD32 .L4 0x0 +-# RELAX-NEXT: 0x21 R_LARCH_SUB32 .L3 0x0 +-# RELAX-NEXT: 0x25 R_LARCH_ADD64 .L4 0x0 +-# RELAX-NEXT: 0x25 R_LARCH_SUB64 .L3 0x0 ++# RELAX-NEXT: 0x20 R_LARCH_ADD8 .L4 0x0 ++# RELAX-NEXT: 0x20 R_LARCH_SUB8 .L3 0x0 ++# RELAX-NEXT: 0x21 R_LARCH_ADD16 .L4 0x0 ++# RELAX-NEXT: 0x21 R_LARCH_SUB16 .L3 0x0 ++# RELAX-NEXT: 0x23 R_LARCH_ADD32 .L4 0x0 ++# RELAX-NEXT: 0x23 R_LARCH_SUB32 .L3 0x0 ++# RELAX-NEXT: 0x27 R_LARCH_ADD64 .L4 0x0 ++# RELAX-NEXT: 0x27 R_LARCH_SUB64 .L3 0x0 ++# RELAX-NEXT: 0x2F R_LARCH_ADD_ULEB128 .L4 0x0 ++# RELAX-NEXT: 0x2F R_LARCH_SUB_ULEB128 .L3 0x0 ++# RELAX-NEXT: 0x30 R_LARCH_ADD8 foo 0x0 ++# RELAX-NEXT: 0x30 R_LARCH_SUB8 .L3 0x0 ++# RELAX-NEXT: 0x31 R_LARCH_ADD16 foo 0x0 ++# RELAX-NEXT: 0x31 R_LARCH_SUB16 .L3 0x0 ++# RELAX-NEXT: 0x33 R_LARCH_ADD32 foo 0x0 ++# RELAX-NEXT: 0x33 R_LARCH_SUB32 .L3 0x0 ++# RELAX-NEXT: 0x37 R_LARCH_ADD64 foo 0x0 ++# RELAX-NEXT: 0x37 R_LARCH_SUB64 .L3 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: ] + + # RELAX: Hex dump of section '.data': +-# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 0000000c +-# RELAX-NEXT: 0x00000010 0c000c00 00000c00 00000000 00000000 +-# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00 ++# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 ++# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c ++# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000 ++# RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 + + .text + .L1: +@@ -55,13 +77,20 @@ + .short .L2 - .L1 + .word .L2 - .L1 + .dword .L2 - .L1 ++.uleb128 .L2 - .L1 + ## TODO Handle alignment directive. + .byte .L3 - .L2 + .short .L3 - .L2 + .word .L3 - .L2 + .dword .L3 - .L2 ++.uleb128 .L3 - .L2 + ## With relaxation, emit relocs because the la.pcrel makes the diff variable. + .byte .L4 - .L3 + .short .L4 - .L3 + .word .L4 - .L3 + .dword .L4 - .L3 ++.uleb128 .L4 - .L3 ++.byte foo - .L3 ++.short foo - .L3 ++.word foo - .L3 ++.dword foo - .L3 +diff --git a/llvm/test/MC/X86/invalid-sleb.s b/llvm/test/MC/X86/invalid-sleb.s +deleted file mode 100644 +index 7d7df351ce4e..000000000000 +--- a/llvm/test/MC/X86/invalid-sleb.s ++++ /dev/null +@@ -1,5 +0,0 @@ +-// RUN: not --crash llvm-mc -filetype=obj -triple x86_64-pc-linux %s -o %t 2>&1 | FileCheck %s +- +-// CHECK: sleb128 and uleb128 expressions must be absolute +- +- .sleb128 undefined +-- +2.20.1 + diff --git a/0006-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch b/0006-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7c1236f3679c93243c63dc0c74fe9dacac396e4 --- /dev/null +++ b/0006-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch @@ -0,0 +1,376 @@ +From 286c92a8e78c4b67368c2f47a8e73036fdacbae2 Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Tue, 16 Jan 2024 13:20:13 +0800 +Subject: [PATCH 06/66] [LoongArch] Add relaxDwarfLineAddr and relaxDwarfCFA to + handle the mutable label diff in dwarfinfo (#77728) + +When linker-relaxation is enabled, part of the label diff in dwarfinfo +cannot be computed before static link. Refer to RISCV, we add the +relaxDwarfLineAddr and relaxDwarfCFA to add relocations for these label +diffs. Calculate whether the label diff is mutable. For immutable label +diff, return false and do the other works by its parent function. + +(cherry picked from commit ed7f4edc19ada006789318a0929b57d1b5a761bd) +Change-Id: Iae5bad958c6d1a71dac1672f5f03991eaeea6d22 +--- + llvm/lib/Object/RelocationResolver.cpp | 12 +- + .../MCTargetDesc/LoongArchAsmBackend.cpp | 129 ++++++++++++++++++ + .../MCTargetDesc/LoongArchAsmBackend.h | 5 + + .../LoongArch/dwarf-loongarch-relocs.ll | 128 +++++++++++++++++ + llvm/test/DebugInfo/LoongArch/lit.local.cfg | 2 + + 5 files changed, 274 insertions(+), 2 deletions(-) + create mode 100644 llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll + create mode 100644 llvm/test/DebugInfo/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp +index 03ac59289528..0e5036d7dfcc 100644 +--- a/llvm/lib/Object/RelocationResolver.cpp ++++ b/llvm/lib/Object/RelocationResolver.cpp +@@ -539,6 +539,8 @@ static bool supportsLoongArch(uint64_t Type) { + case ELF::R_LARCH_32: + case ELF::R_LARCH_32_PCREL: + case ELF::R_LARCH_64: ++ case ELF::R_LARCH_ADD6: ++ case ELF::R_LARCH_SUB6: + case ELF::R_LARCH_ADD8: + case ELF::R_LARCH_SUB8: + case ELF::R_LARCH_ADD16: +@@ -564,6 +566,10 @@ static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, + return (S + Addend - Offset) & 0xFFFFFFFF; + case ELF::R_LARCH_64: + return S + Addend; ++ case ELF::R_LARCH_ADD6: ++ return (LocData & 0xC0) | ((LocData + S + Addend) & 0x3F); ++ case ELF::R_LARCH_SUB6: ++ return (LocData & 0xC0) | ((LocData - (S + Addend)) & 0x3F); + case ELF::R_LARCH_ADD8: + return (LocData + (S + Addend)) & 0xFF; + case ELF::R_LARCH_SUB8: +@@ -880,8 +886,10 @@ uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R, + + if (GetRelSectionType() == ELF::SHT_RELA) { + Addend = getELFAddend(R); +- // RISCV relocations use both LocData and Addend. +- if (Obj->getArch() != Triple::riscv32 && ++ // LoongArch and RISCV relocations use both LocData and Addend. ++ if (Obj->getArch() != Triple::loongarch32 && ++ Obj->getArch() != Triple::loongarch64 && ++ Obj->getArch() != Triple::riscv32 && + Obj->getArch() != Triple::riscv64) + LocData = 0; + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index 9227d4d6afed..8d82327b2e2b 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -12,6 +12,7 @@ + + #include "LoongArchAsmBackend.h" + #include "LoongArchFixupKinds.h" ++#include "llvm/MC/MCAsmInfo.h" + #include "llvm/MC/MCAsmLayout.h" + #include "llvm/MC/MCAssembler.h" + #include "llvm/MC/MCContext.h" +@@ -19,6 +20,7 @@ + #include "llvm/MC/MCValue.h" + #include "llvm/Support/Endian.h" + #include "llvm/Support/EndianStream.h" ++#include "llvm/Support/LEB128.h" + + #define DEBUG_TYPE "loongarch-asmbackend" + +@@ -235,6 +237,133 @@ std::pair LoongArchAsmBackend::relaxLEB128(MCLEBFragment &LF, + return std::make_pair(true, true); + } + ++bool LoongArchAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, ++ MCAsmLayout &Layout, ++ bool &WasRelaxed) const { ++ MCContext &C = Layout.getAssembler().getContext(); ++ ++ int64_t LineDelta = DF.getLineDelta(); ++ const MCExpr &AddrDelta = DF.getAddrDelta(); ++ SmallVectorImpl &Data = DF.getContents(); ++ SmallVectorImpl &Fixups = DF.getFixups(); ++ size_t OldSize = Data.size(); ++ ++ int64_t Value; ++ if (AddrDelta.evaluateAsAbsolute(Value, Layout)) ++ return false; ++ bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, Layout); ++ assert(IsAbsolute && "CFA with invalid expression"); ++ (void)IsAbsolute; ++ ++ Data.clear(); ++ Fixups.clear(); ++ raw_svector_ostream OS(Data); ++ ++ // INT64_MAX is a signal that this is actually a DW_LNE_end_sequence. ++ if (LineDelta != INT64_MAX) { ++ OS << uint8_t(dwarf::DW_LNS_advance_line); ++ encodeSLEB128(LineDelta, OS); ++ } ++ ++ unsigned Offset; ++ std::pair FK; ++ ++ // According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode ++ // takes a single unsigned half (unencoded) operand. The maximum encodable ++ // value is therefore 65535. Set a conservative upper bound for relaxation. ++ if (Value > 60000) { ++ unsigned PtrSize = C.getAsmInfo()->getCodePointerSize(); ++ ++ OS << uint8_t(dwarf::DW_LNS_extended_op); ++ encodeULEB128(PtrSize + 1, OS); ++ ++ OS << uint8_t(dwarf::DW_LNE_set_address); ++ Offset = OS.tell(); ++ assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size"); ++ FK = getRelocPairForSize(PtrSize == 4 ? 32 : 64); ++ OS.write_zeros(PtrSize); ++ } else { ++ OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); ++ Offset = OS.tell(); ++ FK = getRelocPairForSize(16); ++ support::endian::write(OS, 0, support::little); ++ } ++ ++ const MCBinaryExpr &MBE = cast(AddrDelta); ++ Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK))); ++ Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK))); ++ ++ if (LineDelta == INT64_MAX) { ++ OS << uint8_t(dwarf::DW_LNS_extended_op); ++ OS << uint8_t(1); ++ OS << uint8_t(dwarf::DW_LNE_end_sequence); ++ } else { ++ OS << uint8_t(dwarf::DW_LNS_copy); ++ } ++ ++ WasRelaxed = OldSize != Data.size(); ++ return true; ++} ++ ++bool LoongArchAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, ++ MCAsmLayout &Layout, ++ bool &WasRelaxed) const { ++ const MCExpr &AddrDelta = DF.getAddrDelta(); ++ SmallVectorImpl &Data = DF.getContents(); ++ SmallVectorImpl &Fixups = DF.getFixups(); ++ size_t OldSize = Data.size(); ++ ++ int64_t Value; ++ if (AddrDelta.evaluateAsAbsolute(Value, Layout)) ++ return false; ++ bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, Layout); ++ assert(IsAbsolute && "CFA with invalid expression"); ++ (void)IsAbsolute; ++ ++ Data.clear(); ++ Fixups.clear(); ++ raw_svector_ostream OS(Data); ++ ++ assert( ++ Layout.getAssembler().getContext().getAsmInfo()->getMinInstAlignment() == ++ 1 && ++ "expected 1-byte alignment"); ++ if (Value == 0) { ++ WasRelaxed = OldSize != Data.size(); ++ return true; ++ } ++ ++ auto AddFixups = [&Fixups, ++ &AddrDelta](unsigned Offset, ++ std::pair FK) { ++ const MCBinaryExpr &MBE = cast(AddrDelta); ++ Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK))); ++ Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK))); ++ }; ++ ++ if (isUIntN(6, Value)) { ++ OS << uint8_t(dwarf::DW_CFA_advance_loc); ++ AddFixups(0, getRelocPairForSize(6)); ++ } else if (isUInt<8>(Value)) { ++ OS << uint8_t(dwarf::DW_CFA_advance_loc1); ++ support::endian::write(OS, 0, support::little); ++ AddFixups(1, getRelocPairForSize(8)); ++ } else if (isUInt<16>(Value)) { ++ OS << uint8_t(dwarf::DW_CFA_advance_loc2); ++ support::endian::write(OS, 0, support::little); ++ AddFixups(1, getRelocPairForSize(16)); ++ } else if (isUInt<32>(Value)) { ++ OS << uint8_t(dwarf::DW_CFA_advance_loc4); ++ support::endian::write(OS, 0, support::little); ++ AddFixups(1, getRelocPairForSize(32)); ++ } else { ++ llvm_unreachable("unsupported CFA encoding"); ++ } ++ ++ WasRelaxed = OldSize != Data.size(); ++ return true; ++} ++ + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + // We mostly follow binutils' convention here: align to 4-byte boundary with a +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 49801e4fd81a..657f5ca5e731 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -68,6 +68,11 @@ public: + std::pair relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, + int64_t &Value) const override; + ++ bool relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF, MCAsmLayout &Layout, ++ bool &WasRelaxed) const override; ++ bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, ++ bool &WasRelaxed) const override; ++ + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; + +diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll +new file mode 100644 +index 000000000000..e03b4c1d34de +--- /dev/null ++++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll +@@ -0,0 +1,128 @@ ++; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=-relax %s -o %t.o ++; RUN: llvm-readobj -r %t.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-NORL %s ++; RUN: llvm-objdump --source %t.o | FileCheck --check-prefix=SOURCE %s ++; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefix=DWARF %s ++ ++; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=+relax %s -o %t.r.o ++; RUN: llvm-readobj -r %t.r.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-ENRL %s ++; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefix=SOURCE %s ++; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefix=DWARF %s ++ ++; RELOCS-BOTH: Relocations [ ++; RELOCS-BOTH-NEXT: Section ({{.*}}) .rela.text { ++; RELOCS-BOTH-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 ++; RELOCS-ENRL-NEXT: 0x14 R_LARCH_RELAX - 0x0 ++; RELOCS-BOTH-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 ++; RELOCS-ENRL-NEXT: 0x18 R_LARCH_RELAX - 0x0 ++; RELOCS-BOTH-NEXT: } ++; RELOCS-BOTH: Section ({{.*}}) .rela.debug_frame { ++; RELOCS-NORL-NEXT: 0x1C R_LARCH_32 .debug_frame 0x0 ++; RELOCS-NORL-NEXT: 0x20 R_LARCH_64 .text 0x0 ++; RELOCS-ENRL-NEXT: 0x1C R_LARCH_32 0x0 ++; RELOCS-ENRL-NEXT: 0x20 R_LARCH_64 0x0 ++; RELOCS-ENRL-NEXT: 0x28 R_LARCH_ADD64 0x0 ++; RELOCS-ENRL-NEXT: 0x28 R_LARCH_SUB64 0x0 ++; RELOCS-ENRL-NEXT: 0x3F R_LARCH_ADD6 0x0 ++; RELOCS-ENRL-NEXT: 0x3F R_LARCH_SUB6 0x0 ++; RELOCS-BOTH-NEXT: } ++; RELOCS-BOTH: Section ({{.*}}) .rela.debug_line { ++; RELOCS-BOTH-NEXT: 0x22 R_LARCH_32 .debug_line_str 0x0 ++; RELOCS-BOTH-NEXT: 0x31 R_LARCH_32 .debug_line_str 0x2 ++; RELOCS-BOTH-NEXT: 0x46 R_LARCH_32 .debug_line_str 0x1B ++; RELOCS-NORL-NEXT: 0x4F R_LARCH_64 .text 0x0 ++; RELOCS-ENRL-NEXT: 0x4F R_LARCH_64 0x0 ++; RELOCS-ENRL-NEXT: 0x5F R_LARCH_ADD16 0x0 ++; RELOCS-ENRL-NEXT: 0x5F R_LARCH_SUB16 0x0 ++; RELOCS-BOTH-NEXT: } ++; RELOCS-BOTH-NEXT: ] ++ ++; SOURCE: 0000000000000000 : ++; SOURCE: ; { ++; SOURCE: ; asm volatile( ++; SOURCE: ; return 0; ++ ++; DWARF: DW_AT_producer ("clang") ++; DWARF: DW_AT_name ("dwarf-loongarch-relocs.c") ++; DWARF: DW_AT_comp_dir (".") ++; DWARF: DW_AT_name ("foo") ++; DWARF-NEXT: DW_AT_decl_file ("{{.*}}dwarf-loongarch-relocs.c") ++; DWARF-NEXT: DW_AT_decl_line (1) ++; DWARF-NEXT: DW_AT_type (0x00000032 "int") ++; DWARF: DW_AT_name ("int") ++; DWARF-NEXT: DW_AT_encoding (DW_ATE_signed) ++; DWARF-NEXT: DW_AT_byte_size (0x04) ++; DWARF: .debug_line contents: ++; DWARF-NEXT: debug_line[0x00000000] ++; DWARF-NEXT: Line table prologue: ++; DWARF-NEXT: total_length: {{.*}} ++; DWARF-NEXT: format: DWARF32 ++; DWARF-NEXT: version: 5 ++; DWARF-NEXT: address_size: 8 ++; DWARF-NEXT: seg_select_size: 0 ++; DWARF-NEXT: prologue_length: 0x0000003e ++; DWARF-NEXT: min_inst_length: 1 ++; DWARF-NEXT: max_ops_per_inst: 1 ++; DWARF-NEXT: default_is_stmt: 1 ++; DWARF-NEXT: line_base: -5 ++; DWARF-NEXT: line_range: 14 ++; DWARF-NEXT: opcode_base: 13 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_copy] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_pc] = 1 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_line] = 1 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_file] = 1 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_column] = 1 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_negate_stmt] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_basic_block] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_const_add_pc] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0 ++; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_isa] = 1 ++; DWARF-NEXT: include_directories[ 0] = "." ++; DWARF-NEXT: file_names[ 0]: ++; DWARF-NEXT: name: "dwarf-loongarch-relocs.c" ++; DWARF-NEXT: dir_index: 0 ++; DWARF-NEXT: md5_checksum: f44d6d71bc4da58b4abe338ca507c007 ++; DWARF-NEXT: source: "{{.*}}" ++; DWARF-EMPTY: ++; DWARF-NEXT: Address Line Column File ISA Discriminator OpIndex Flags ++; DWARF-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- ++; DWARF-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt ++; DWARF-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end ++; DWARF-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt ++; DWARF-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin ++; DWARF-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence ++ ++; ModuleID = 'dwarf-loongarch-relocs.c' ++source_filename = "dwarf-loongarch-relocs.c" ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64" ++ ++; Function Attrs: noinline nounwind optnone ++define dso_local signext i32 @foo() #0 !dbg !8 { ++ call void asm sideeffect ".cfi_remember_state\0A\09.cfi_adjust_cfa_offset 16\0A\09nop\0A\09la.pcrel $$t0, sym\0A\09nop\0A\09.cfi_restore_state\0A\09", ""() #1, !dbg !12, !srcloc !13 ++ ret i32 0, !dbg !14 ++} ++ ++attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="loongarch64" "target-features"="+64bit,+d,+f,+ual" } ++attributes #1 = { nounwind } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!2, !3, !4, !5, !6} ++!llvm.ident = !{!7} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) ++!1 = !DIFile(filename: "dwarf-loongarch-relocs.c", directory: ".", checksumkind: CSK_MD5, checksum: "f44d6d71bc4da58b4abe338ca507c007", source: "int foo()\0A{\0A asm volatile(\0A \22.cfi_remember_state\\n\\t\22\0A \22.cfi_adjust_cfa_offset 16\\n\\t\22\0A \22nop\\n\\t\22\0A \22la.pcrel $t0, sym\\n\\t\22\0A \22nop\\n\\t\22\0A \22.cfi_restore_state\\n\\t\22);\0A return 0;\0A}\0A") ++!2 = !{i32 7, !"Dwarf Version", i32 5} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 7, !"direct-access-external-data", i32 0} ++!6 = !{i32 7, !"frame-pointer", i32 2} ++!7 = !{!"clang"} ++!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0) ++!9 = !DISubroutineType(types: !10) ++!10 = !{!11} ++!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ++!12 = !DILocation(line: 3, column: 3, scope: !8) ++!13 = !{i64 34, i64 56, i64 92, i64 106, i64 134, i64 148, i64 177} ++!14 = !DILocation(line: 10, column: 3, scope: !8) +diff --git a/llvm/test/DebugInfo/LoongArch/lit.local.cfg b/llvm/test/DebugInfo/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..77becb8eee90 +--- /dev/null ++++ b/llvm/test/DebugInfo/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if "LoongArch" not in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + diff --git a/0007-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch b/0007-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch new file mode 100644 index 0000000000000000000000000000000000000000..74a5caad762fece523bd9def0f67dce075f2464b --- /dev/null +++ b/0007-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch @@ -0,0 +1,362 @@ +From 87f6adc2acf635a0a4c294217fb54c55eee3a06c Mon Sep 17 00:00:00 2001 +From: Jinyang He +Date: Wed, 24 Jan 2024 09:17:49 +0800 +Subject: [PATCH 07/66] [LoongArch] Insert nops and emit align reloc when + handle alignment directive (#72962) + +Refer to RISCV, we will fix up the alignment if linker relaxation +changes code size and breaks alignment. Insert enough Nops and emit +R_LARCH_ALIGN relocation type so that linker could satisfy the alignment +by removing Nops. +It does so only in sections with the SHF_EXECINSTR flag. + +In LoongArch psABI v2.30, R_LARCH_ALIGN requires symbol index. The +lowest 8 bits of addend represent alignment and the other bits of addend +represent the maximum number of bytes to emit. + +(cherry picked from commit c51ab483e6c2d991a01179584705b83fbea1940d) +Change-Id: Iba30702c9dda378acfae0b1f1134926fa838a368 +--- + llvm/lib/MC/MCExpr.cpp | 2 +- + .../MCTargetDesc/LoongArchAsmBackend.cpp | 67 ++++++++++++++++ + .../MCTargetDesc/LoongArchAsmBackend.h | 15 ++++ + .../MCTargetDesc/LoongArchFixupKinds.h | 4 +- + .../Relocations/align-non-executable.s | 27 +++++++ + .../MC/LoongArch/Relocations/relax-addsub.s | 15 +++- + .../MC/LoongArch/Relocations/relax-align.s | 79 +++++++++++++++++++ + 7 files changed, 205 insertions(+), 4 deletions(-) + create mode 100644 llvm/test/MC/LoongArch/Relocations/align-non-executable.s + create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-align.s + +diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp +index a561fed11179..79808a58d81c 100644 +--- a/llvm/lib/MC/MCExpr.cpp ++++ b/llvm/lib/MC/MCExpr.cpp +@@ -711,7 +711,7 @@ static void AttemptToFoldSymbolOffsetDifference( + if (DF) { + Displacement += DF->getContents().size(); + } else if (auto *AF = dyn_cast(FI); +- AF && Layout && ++ AF && Layout && AF->hasEmitNops() && + !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( + *AF, Count)) { + Displacement += Asm->computeFragmentSize(*Layout, *AF); +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index 8d82327b2e2b..8c482356402f 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -17,10 +17,13 @@ + #include "llvm/MC/MCAssembler.h" + #include "llvm/MC/MCContext.h" + #include "llvm/MC/MCELFObjectWriter.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCSection.h" + #include "llvm/MC/MCValue.h" + #include "llvm/Support/Endian.h" + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/LEB128.h" ++#include "llvm/Support/MathExtras.h" + + #define DEBUG_TYPE "loongarch-asmbackend" + +@@ -177,6 +180,70 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, + } + } + ++// Linker relaxation may change code size. We have to insert Nops ++// for .align directive when linker relaxation enabled. So then Linker ++// could satisfy alignment by removing Nops. ++// The function returns the total Nops Size we need to insert. ++bool LoongArchAsmBackend::shouldInsertExtraNopBytesForCodeAlign( ++ const MCAlignFragment &AF, unsigned &Size) { ++ // Calculate Nops Size only when linker relaxation enabled. ++ if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) ++ return false; ++ ++ // Ignore alignment if MaxBytesToEmit is less than the minimum Nop size. ++ const unsigned MinNopLen = 4; ++ if (AF.getMaxBytesToEmit() < MinNopLen) ++ return false; ++ Size = AF.getAlignment().value() - MinNopLen; ++ return AF.getAlignment() > MinNopLen; ++} ++ ++// We need to insert R_LARCH_ALIGN relocation type to indicate the ++// position of Nops and the total bytes of the Nops have been inserted ++// when linker relaxation enabled. ++// The function inserts fixup_loongarch_align fixup which eventually will ++// transfer to R_LARCH_ALIGN relocation type. ++// The improved R_LARCH_ALIGN requires symbol index. The lowest 8 bits of ++// addend represent alignment and the other bits of addend represent the ++// maximum number of bytes to emit. The maximum number of bytes is zero ++// means ignore the emit limit. ++bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign( ++ MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) { ++ // Insert the fixup only when linker relaxation enabled. ++ if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) ++ return false; ++ ++ // Calculate total Nops we need to insert. If there are none to insert ++ // then simply return. ++ unsigned Count; ++ if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count)) ++ return false; ++ ++ MCSection *Sec = AF.getParent(); ++ MCContext &Ctx = Asm.getContext(); ++ const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); ++ // Create fixup_loongarch_align fixup. ++ MCFixup Fixup = ++ MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align)); ++ const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec]; ++ if (MCSym == nullptr) { ++ // Create a symbol and make the value of symbol is zero. ++ MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align"); ++ Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); ++ Asm.registerSymbol(*Sym); ++ MCSym = MCSymbolRefExpr::create(Sym, Ctx); ++ getSecToAlignSym()[Sec] = MCSym; ++ } ++ ++ uint64_t FixedValue = 0; ++ unsigned Lo = Log2_64(Count) + 1; ++ unsigned Hi = AF.getMaxBytesToEmit() >= Count ? 0 : AF.getMaxBytesToEmit(); ++ MCValue Value = MCValue::get(MCSym, nullptr, Hi << 8 | Lo); ++ Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, Value, FixedValue); ++ ++ return true; ++} ++ + bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 657f5ca5e731..71bbd003888a 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -17,7 +17,9 @@ + #include "MCTargetDesc/LoongArchFixupKinds.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "llvm/MC/MCAsmBackend.h" ++#include "llvm/MC/MCExpr.h" + #include "llvm/MC/MCFixupKindInfo.h" ++#include "llvm/MC/MCSection.h" + #include "llvm/MC/MCSubtargetInfo.h" + + namespace llvm { +@@ -27,6 +29,7 @@ class LoongArchAsmBackend : public MCAsmBackend { + uint8_t OSABI; + bool Is64Bit; + const MCTargetOptions &TargetOptions; ++ DenseMap SecToAlignSym; + + public: + LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, +@@ -45,6 +48,15 @@ public: + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + ++ // Return Size with extra Nop Bytes for alignment directive in code section. ++ bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, ++ unsigned &Size) override; ++ ++ // Insert target specific fixup type for alignment directive in code section. ++ bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, ++ const MCAsmLayout &Layout, ++ MCAlignFragment &AF) override; ++ + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + +@@ -79,6 +91,9 @@ public: + std::unique_ptr + createObjectTargetWriter() const override; + const MCTargetOptions &getTargetOptions() const { return TargetOptions; } ++ DenseMap &getSecToAlignSym() { ++ return SecToAlignSym; ++ } + }; + } // end namespace llvm + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +index 178fa6e5262b..78414408f21f 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +@@ -108,7 +108,9 @@ enum Fixups { + // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. + fixup_loongarch_tls_gd_hi20, + // Generate an R_LARCH_RELAX which indicates the linker may relax here. +- fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX ++ fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, ++ // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here. ++ fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN, + }; + } // end namespace LoongArch + } // end namespace llvm +diff --git a/llvm/test/MC/LoongArch/Relocations/align-non-executable.s b/llvm/test/MC/LoongArch/Relocations/align-non-executable.s +new file mode 100644 +index 000000000000..47834acd9521 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/align-non-executable.s +@@ -0,0 +1,27 @@ ++## A label difference separated by an alignment directive, when the ++## referenced symbols are in a non-executable section with instructions, ++## should generate ADD/SUB relocations. ++## https://github.com/llvm/llvm-project/pull/76552 ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ ++# RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK,RELAX %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ ++# RUN: | llvm-readobj -r - | FileCheck %s ++ ++.section ".dummy", "a" ++.L1: ++ la.pcrel $t0, sym ++.p2align 3 ++.L2: ++.dword .L2 - .L1 ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.dummy { ++# CHECK-NEXT: 0x0 R_LARCH_PCALA_HI20 sym 0x0 ++# RELAX-NEXT: 0x0 R_LARCH_RELAX - 0x0 ++# CHECK-NEXT: 0x4 R_LARCH_PCALA_LO12 sym 0x0 ++# RELAX-NEXT: 0x4 R_LARCH_RELAX - 0x0 ++# RELAX-NEXT: 0x8 R_LARCH_ADD64 .L2 0x0 ++# RELAX-NEXT: 0x8 R_LARCH_SUB64 .L1 0x0 ++# CHECK-NEXT: } ++# CHECK-NEXT: ] +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +index cd01332afd0b..18e0ede5e293 100644 +--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s ++++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +@@ -28,12 +28,23 @@ + + # RELAX: Relocations [ + # RELAX-NEXT: Section ({{.*}}) .rela.text { ++# RELAX-NEXT: 0x4 R_LARCH_ALIGN {{.*}} 0x4 + # RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 + # RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 + # RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0 + # RELAX-NEXT: } + # RELAX-NEXT: Section ({{.*}}) .rela.data { ++# RELAX-NEXT: 0x10 R_LARCH_ADD8 .L3 0x0 ++# RELAX-NEXT: 0x10 R_LARCH_SUB8 .L2 0x0 ++# RELAX-NEXT: 0x11 R_LARCH_ADD16 .L3 0x0 ++# RELAX-NEXT: 0x11 R_LARCH_SUB16 .L2 0x0 ++# RELAX-NEXT: 0x13 R_LARCH_ADD32 .L3 0x0 ++# RELAX-NEXT: 0x13 R_LARCH_SUB32 .L2 0x0 ++# RELAX-NEXT: 0x17 R_LARCH_ADD64 .L3 0x0 ++# RELAX-NEXT: 0x17 R_LARCH_SUB64 .L2 0x0 ++# RELAX-NEXT: 0x1F R_LARCH_ADD_ULEB128 .L3 0x0 ++# RELAX-NEXT: 0x1F R_LARCH_SUB_ULEB128 .L2 0x0 + # RELAX-NEXT: 0x20 R_LARCH_ADD8 .L4 0x0 + # RELAX-NEXT: 0x20 R_LARCH_SUB8 .L3 0x0 + # RELAX-NEXT: 0x21 R_LARCH_ADD16 .L4 0x0 +@@ -57,7 +68,7 @@ + + # RELAX: Hex dump of section '.data': + # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004 +-# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c ++# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000 + # RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000 + # RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000 + +@@ -78,7 +89,7 @@ + .word .L2 - .L1 + .dword .L2 - .L1 + .uleb128 .L2 - .L1 +-## TODO Handle alignment directive. ++## With relaxation, emit relocs because the .align makes the diff variable. + .byte .L3 - .L2 + .short .L3 - .L2 + .word .L3 - .L2 +diff --git a/llvm/test/MC/LoongArch/Relocations/relax-align.s b/llvm/test/MC/LoongArch/Relocations/relax-align.s +new file mode 100644 +index 000000000000..294fd9fb916c +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/relax-align.s +@@ -0,0 +1,79 @@ ++## The file testing Nop insertion with R_LARCH_ALIGN for relaxation. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t ++# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=INSTR ++# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.r ++# RUN: llvm-objdump -d %t.r | FileCheck %s --check-prefixes=INSTR,RELAX-INSTR ++# RUN: llvm-readobj -r %t.r | FileCheck %s --check-prefixes=RELOC,RELAX-RELOC ++ ++.text ++break 0 ++# INSTR: break 0 ++ ++## Not emit R_LARCH_ALIGN if alignment directive is less than or equal to ++## minimum code alignment(a.k.a 4). ++.p2align 2 ++.p2align 1 ++.p2align 0 ++ ++## Not emit instructions if max emit bytes less than min nop size. ++.p2align 4, , 2 ++ ++## Not emit R_LARCH_ALIGN if alignment directive with specific padding value. ++## The behavior is the same as GNU assembler. ++break 1 ++.p2align 4, 1 ++# INSTR-NEXT: break 1 ++# INSTR-COUNT-2: 01 01 01 01 ++ ++break 2 ++.p2align 4, 1, 12 ++# INSTR-NEXT: break 2 ++# INSTR-COUNT-3: 01 01 01 01 ++ ++break 3 ++.p2align 4 ++# INSTR-NEXT: break 3 ++# INSTR-COUNT-3: nop ++ ++break 4 ++.p2align 5 ++.p2align 4 ++# INSTR-NEXT: break 4 ++# INSTR-COUNT-3: nop ++# RELAX-INSTR-COUNT-7: nop ++ ++break 5 ++.p2align 4, , 11 ++# INSTR-NEXT: break 5 ++# RELAX-INSTR-COUNT-3: nop ++ ++break 6 ++## Not emit the third parameter. ++.p2align 4, , 12 ++# INSTR-NEXT: break 6 ++# INSTR-NEXT: nop ++# INSTR-NEXT: nop ++# RELAX-INSTR-NEXT: nop ++ ++ret ++# INSNR-NEXT: ret ++ ++## Test the symbol index is different from .text. ++.section .text2, "ax" ++.p2align 4 ++break 7 ++ ++# RELOC: Relocations [ ++# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text { ++# RELAX-RELOC-NEXT: 0x24 R_LARCH_ALIGN .Lla-relax-align0 0x4 ++# RELAX-RELOC-NEXT: 0x34 R_LARCH_ALIGN .Lla-relax-align0 0x5 ++# RELAX-RELOC-NEXT: 0x50 R_LARCH_ALIGN .Lla-relax-align0 0x4 ++# RELAX-RELOC-NEXT: 0x60 R_LARCH_ALIGN .Lla-relax-align0 0xB04 ++# RELAX-RELOC-NEXT: 0x70 R_LARCH_ALIGN .Lla-relax-align0 0x4 ++# RELAX-RELOC-NEXT: } ++# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text2 { ++# RELAX-RELOC-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align1 0x4 ++# RELAX-RELOC-NEXT: } ++# RELOC-NEXT: ] +-- +2.20.1 + diff --git a/0008-test-Update-dwarf-loongarch-relocs.ll.patch b/0008-test-Update-dwarf-loongarch-relocs.ll.patch new file mode 100644 index 0000000000000000000000000000000000000000..764e79d737f67a10a21577e8e71f6a6f3cbb5c1c --- /dev/null +++ b/0008-test-Update-dwarf-loongarch-relocs.ll.patch @@ -0,0 +1,86 @@ +From f51ee6c3468eacc82d3b3f09fcca381178bdc9e7 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Wed, 24 Jan 2024 11:03:14 +0800 +Subject: [PATCH 08/66] [test] Update dwarf-loongarch-relocs.ll + +Address buildbot faiures: +http://45.33.8.238/macm1/77360/step_11.txt +http://45.33.8.238/linux/128902/step_12.txt + +(cherry picked from commit baba7e4175b6ca21e83b1cf8229f29dbba02e979) +(cherry picked from commit c9e73cdd9a17f15ede120ea57657553f9e105eab) +Change-Id: I00aa1414f556f0ba5ff6bf6a879a6fc1fcfa49e0 +--- + .../LoongArch/dwarf-loongarch-relocs.ll | 37 ++++++++++++------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll +index e03b4c1d34de..07443a62b933 100644 +--- a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll ++++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll +@@ -1,19 +1,22 @@ + ; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=-relax %s -o %t.o + ; RUN: llvm-readobj -r %t.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-NORL %s +-; RUN: llvm-objdump --source %t.o | FileCheck --check-prefix=SOURCE %s +-; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefix=DWARF %s ++; RUN: llvm-objdump --source %t.o | FileCheck --check-prefixes=SOURCE,SOURCE-NORL %s ++; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | FileCheck --check-prefixes=DWARF,DWARF-NORL %s + + ; RUN: llc --filetype=obj --mtriple=loongarch64 --mattr=+relax %s -o %t.r.o + ; RUN: llvm-readobj -r %t.r.o | FileCheck --check-prefixes=RELOCS-BOTH,RELOCS-ENRL %s +-; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefix=SOURCE %s +-; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefix=DWARF %s ++; RUN: llvm-objdump --source %t.r.o | FileCheck --check-prefixes=SOURCE,SOURCE-ENRL %s ++; RUN: llvm-dwarfdump --debug-info --debug-line %t.r.o | FileCheck --check-prefixes=DWARF,DWARF-ENRL %s + + ; RELOCS-BOTH: Relocations [ + ; RELOCS-BOTH-NEXT: Section ({{.*}}) .rela.text { +-; RELOCS-BOTH-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 +-; RELOCS-ENRL-NEXT: 0x14 R_LARCH_RELAX - 0x0 +-; RELOCS-BOTH-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 +-; RELOCS-ENRL-NEXT: 0x18 R_LARCH_RELAX - 0x0 ++; RELOCS-NORL-NEXT: 0x14 R_LARCH_PCALA_HI20 sym 0x0 ++; RELOCS-NORL-NEXT: 0x18 R_LARCH_PCALA_LO12 sym 0x0 ++; RELOCS-ENRL-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align0 0x5 ++; RELOCS-ENRL-NEXT: 0x30 R_LARCH_PCALA_HI20 sym 0x0 ++; RELOCS-ENRL-NEXT: 0x30 R_LARCH_RELAX - 0x0 ++; RELOCS-ENRL-NEXT: 0x34 R_LARCH_PCALA_LO12 sym 0x0 ++; RELOCS-ENRL-NEXT: 0x34 R_LARCH_RELAX - 0x0 + ; RELOCS-BOTH-NEXT: } + ; RELOCS-BOTH: Section ({{.*}}) .rela.debug_frame { + ; RELOCS-NORL-NEXT: 0x1C R_LARCH_32 .debug_frame 0x0 +@@ -36,7 +39,8 @@ + ; RELOCS-BOTH-NEXT: } + ; RELOCS-BOTH-NEXT: ] + +-; SOURCE: 0000000000000000 : ++; SOURCE-NORL: 0000000000000000 : ++; SOURCE-ENRL: 000000000000001c : + ; SOURCE: ; { + ; SOURCE: ; asm volatile( + ; SOURCE: ; return 0; +@@ -87,11 +91,16 @@ + ; DWARF-EMPTY: + ; DWARF-NEXT: Address Line Column File ISA Discriminator OpIndex Flags + ; DWARF-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +-; DWARF-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt +-; DWARF-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end +-; DWARF-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt +-; DWARF-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin +-; DWARF-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence ++; DWARF-NORL-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt ++; DWARF-NORL-NEXT: 0x0000000000000010 3 3 0 0 0 0 is_stmt prologue_end ++; DWARF-NORL-NEXT: 0x0000000000000020 10 3 0 0 0 0 is_stmt ++; DWARF-NORL-NEXT: 0x000000000000002c 10 3 0 0 0 0 epilogue_begin ++; DWARF-NORL-NEXT: 0x0000000000000034 10 3 0 0 0 0 end_sequence ++; DWARF-ENRL-NEXT: 0x000000000000001c 2 0 0 0 0 0 is_stmt ++; DWARF-ENRL-NEXT: 0x000000000000002c 3 3 0 0 0 0 is_stmt prologue_end ++; DWARF-ENRL-NEXT: 0x000000000000003c 10 3 0 0 0 0 is_stmt ++; DWARF-ENRL-NEXT: 0x0000000000000048 10 3 0 0 0 0 epilogue_begin ++; DWARF-ENRL-NEXT: 0x0000000000000050 10 3 0 0 0 0 end_sequence + + ; ModuleID = 'dwarf-loongarch-relocs.c' + source_filename = "dwarf-loongarch-relocs.c" +-- +2.20.1 + diff --git a/0009-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch b/0009-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch new file mode 100644 index 0000000000000000000000000000000000000000..71b77f6bdb5aa0b2216c607de9145dcd5111e4d7 --- /dev/null +++ b/0009-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch @@ -0,0 +1,53 @@ +From 442b5109ccbabed1110c122c1ca92d4194ba632b Mon Sep 17 00:00:00 2001 +From: Fangrui Song +Date: Wed, 9 Aug 2023 21:42:18 -0700 +Subject: [PATCH 09/66] [MC][test] Change ELF/uleb-ehtable.s Mach-O to use + private symbols in .uleb128 for label differences + +On Mach-O, `.uleb128 A-B` where A and B are separated by a non-private symbol is invalid +(see D153167). + +(cherry picked from commit 0a89bda4a8b756a00985e0965f7686b5ceb43295) +Change-Id: I92ed11d6913b8c781e29be6e8c642cf0a371910d +--- + llvm/test/MC/ELF/uleb-ehtable.s | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/llvm/test/MC/ELF/uleb-ehtable.s b/llvm/test/MC/ELF/uleb-ehtable.s +index ca3f9e97bffc..6407223f36e7 100644 +--- a/llvm/test/MC/ELF/uleb-ehtable.s ++++ b/llvm/test/MC/ELF/uleb-ehtable.s +@@ -1,7 +1,7 @@ + // RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=ELF + // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=ELF +-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO +-// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO ++// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 --defsym MACHO=1 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO ++// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 --defsym MACHO=1 %s -o - | llvm-readobj -S --sd - | FileCheck %s -check-prefix=CHECK -check-prefix=MACHO + + // Test that we can assemble a GCC-like EH table that has 16381-16383 bytes of + // non-padding data between .ttbaseref and .ttbase. The assembler must insert +@@ -13,11 +13,20 @@ + foo: + .byte 0xff // LPStart omitted + .byte 0x1 // TType encoding (uleb128) ++.ifdef MACHO ++ .uleb128 Lttbase-Lttbaseref ++Lttbaseref: ++.else + .uleb128 .ttbase-.ttbaseref + .ttbaseref: ++.endif + .fill 128*128-1, 1, 0xcd // call site and actions tables + .balign 4 ++.ifdef MACHO ++Lttbase: ++.else + .ttbase: ++.endif + .byte 1, 2, 3, 4 + + // ELF: Name: .data +-- +2.20.1 + diff --git a/0010-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch b/0010-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb8901e15248d6cc4e59115c22557998120d1e35 --- /dev/null +++ b/0010-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch @@ -0,0 +1,135 @@ +From 3b777f98a3997f338919af7ff1ef8a6fd07f76a0 Mon Sep 17 00:00:00 2001 +From: Fangrui Song +Date: Wed, 16 Aug 2023 23:11:59 -0700 +Subject: [PATCH 10/66] [Mips][MC] AttemptToFoldSymbolOffsetDifference: revert + isMicroMips special case + +D52985/D57677 added a .gcc_except_table workaround, but the new behavior +doesn't match GNU assembler. +``` +void foo(); +int bar() { + foo(); + try { throw 1; } + catch (int) { return 1; } + return 0; +} + +clang --target=mipsel-linux-gnu -mmicromips -S a.cc +mipsel-linux-gnu-gcc -mmicromips -c a.s -o gnu.o + +.uleb128 ($cst_end0)-($cst_begin0) // bit 0 is not forced to 1 +.uleb128 ($func_begin0)-($func_begin0) // bit 0 is not forced to 1 +``` + +I have inspected `.gcc_except_table` output by `mipsel-linux-gnu-gcc -mmicromips -c a.cc`. +The `.uleb128` values are not forced to set the least significant bit. + +In addition, D57677's adjustment (even->odd) to CodeGen/Mips/micromips-b-range.ll is wrong. +PC-relative `.long func - .` values will differ from GNU assembler as well. + +The original intention of D52985 seems unclear to me. I think whatever +goal it wants to achieve should be moved to an upper layer. + +This isMicroMips special case has caused problems to fix MCAssembler::relaxLEB to use evaluateAsAbsolute instead of evaluateKnownAbsolute, +which is needed to proper support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128. + +Differential Revision: https://reviews.llvm.org/D157655 + +(cherry picked from commit 4c89277095ee7cda3d20e0f5f18b384212069778) +Change-Id: Iedd73e0c61856c30fde442309fc16d4327829f1a +--- + llvm/lib/MC/MCExpr.cpp | 5 ----- + llvm/test/CodeGen/Mips/micromips-b-range.ll | 8 ++++---- + llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll | 2 +- + llvm/test/DebugInfo/Mips/eh_frame.ll | 4 ++-- + 4 files changed, 7 insertions(+), 12 deletions(-) + +diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp +index 79808a58d81c..c9ff1865cf91 100644 +--- a/llvm/lib/MC/MCExpr.cpp ++++ b/llvm/lib/MC/MCExpr.cpp +@@ -611,11 +611,6 @@ static void AttemptToFoldSymbolOffsetDifference( + if (Asm->isThumbFunc(&SA)) + Addend |= 1; + +- // If symbol is labeled as micromips, we set low-bit to ensure +- // correct offset in .gcc_except_table +- if (Asm->getBackend().isMicroMips(&SA)) +- Addend |= 1; +- + // Clear the symbol expr pointers to indicate we have folded these + // operands. + A = B = nullptr; +diff --git a/llvm/test/CodeGen/Mips/micromips-b-range.ll b/llvm/test/CodeGen/Mips/micromips-b-range.ll +index 064afff3da0e..81d1c04208cc 100644 +--- a/llvm/test/CodeGen/Mips/micromips-b-range.ll ++++ b/llvm/test/CodeGen/Mips/micromips-b-range.ll +@@ -13,7 +13,7 @@ + ; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp) + ; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1 + ; CHECK-NEXT: 26: 40 60 00 02 bal 0x2e +-; CHECK-NEXT: 2a: 30 21 04 69 addiu $1, $1, 1129 ++; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128 + ; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1 + ; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp) + ; CHECK-NEXT: 36: 00 01 0f 3c jr $1 +@@ -27,7 +27,7 @@ + ; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp) + ; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1 + ; CHECK-NEXT: 5e: 40 60 00 02 bal 0x66 +-; CHECK-NEXT: 62: 30 21 04 5d addiu $1, $1, 1117 ++; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116 + ; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1 + ; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp) + ; CHECK-NEXT: 6e: 00 01 0f 3c jr $1 +@@ -39,7 +39,7 @@ + ; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp) + ; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1 + ; CHECK-NEXT: 8e: 40 60 00 02 bal 0x96 +-; CHECK-NEXT: 92: 30 21 04 2d addiu $1, $1, 1069 ++; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068 + ; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1 + ; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp) + ; CHECK-NEXT: 9e: 00 01 0f 3c jr $1 +@@ -51,7 +51,7 @@ + ; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp) + ; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1 + ; CHECK-NEXT: 1047e: 40 60 00 02 bal 0x10486 +-; CHECK-NEXT: 10482: 30 21 04 01 addiu $1, $1, 1025 ++; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024 + ; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1 + ; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp) + ; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1 +diff --git a/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll b/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll +index 2b63aff01574..20d64fc216b7 100644 +--- a/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll ++++ b/llvm/test/CodeGen/Mips/micromips-gcc-except-table.ll +@@ -1,7 +1,7 @@ + ; RUN: llc -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -O3 -filetype=obj < %s | llvm-objdump -s -j .gcc_except_table - | FileCheck %s + + ; CHECK: Contents of section .gcc_except_table: +-; CHECK-NEXT: 0000 ff9b1501 0c011100 00110e1f 011f1800 ++; CHECK-NEXT: 0000 ff9b1501 0c001000 00100e1e 011e1800 + ; CHECK-NEXT: 0010 00010000 00000000 + + @_ZTIi = external constant ptr +diff --git a/llvm/test/DebugInfo/Mips/eh_frame.ll b/llvm/test/DebugInfo/Mips/eh_frame.ll +index 506e5b87892b..60d4dc76777e 100644 +--- a/llvm/test/DebugInfo/Mips/eh_frame.ll ++++ b/llvm/test/DebugInfo/Mips/eh_frame.ll +@@ -26,9 +26,9 @@ + ; CHECK-READELF-PIC-NEXT: R_MIPS_PC32 + ; CHECK-READELF-NEXT: .gcc_except_table + +-; EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c011500 00150e23 01231e00 ...........#.#.. ++; EXCEPT-TABLE-STATIC: 0000 ff9b1501 0c001400 00140e22 01221e00 ...........".".. + ; EXCEPT-TABLE-STATIC: 0010 00010000 00000000 +-; EXCEPT-TABLE-PIC: 0000 ff9b1501 0c012d00 002d133f 013f2a00 ......-..-.?.?*. ++; EXCEPT-TABLE-PIC: 0000 ff9b1501 0c002c00 002c123e 013e2a00 ......,..,.>.>*. + ; EXCEPT-TABLE-PIC: 0010 00010000 00000000 ........ + + @_ZTIi = external constant ptr +-- +2.20.1 + diff --git a/0011-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch b/0011-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch new file mode 100644 index 0000000000000000000000000000000000000000..775972be6a07e0f25138ba0c4ff1c24ba80a4886 --- /dev/null +++ b/0011-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch @@ -0,0 +1,187 @@ +From e5c03f299c1761eec0ae325d995eab121f1dd3a3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 9 Aug 2023 16:01:37 +0800 +Subject: [PATCH 11/66] [Clang][LoongArch] Use the ClangBuiltin class to + automatically generate support for CBE and CFE + +Fixed the type modifier (L->W), removed redundant feature checking code +since the feature has already been checked in `EmitBuiltinExpr`. And +Cleaned up unused diagnostic information. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D156866 + +(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) + +Change-Id: I058f02e311dd67dc2ec63e404e4bb58e852da1b8 +--- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 141 ++++++++++---------- + llvm/lib/IR/Function.cpp | 1 + + 2 files changed, 72 insertions(+), 70 deletions(-) + +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index 5edce3c529e1..4219b2f55346 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -51,74 +51,75 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; + //===----------------------------------------------------------------------===// + // LoongArch BASE + +-def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], +- [ImmArg>, ImmArg>]>; +-def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +- [ImmArg>, ImmArg>]>; +-def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +-def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +- +-def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], +- [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], +- [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], +- [llvm_i32_ty, llvm_i32_ty, +- llvm_i32_ty], +- [ImmArg>]>; +-def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i64_ty, +- llvm_i32_ty], +- [ImmArg>]>; +- +-def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +-def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; +- +-def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; +-def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; +- +-def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; +- +-def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; +-def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; +- +-def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], +- [llvm_i64_ty, llvm_i64_ty], +- [ImmArg>]>; +-def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], +- [ImmArg>]>; ++class BaseInt ret_types, list param_types, ++ list intr_properties = []> ++ : Intrinsic, ++ ClangBuiltin; ++ ++def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>, ImmArg>]>; ++def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>, ImmArg>]>; ++def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++ ++def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; ++ ++def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], ++ [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], ++ [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], ++ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [ImmArg>]>; ++def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], ++ [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [ImmArg>]>; ++ ++def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; ++ ++def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; ++def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; ++ ++def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; ++ ++def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; ++def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; ++ ++def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>]>; ++def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], ++ [ImmArg>]>; + } // TargetPrefix = "loongarch" +diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp +index 27219e89dc5f..435800d9e5f9 100644 +--- a/llvm/lib/IR/Function.cpp ++++ b/llvm/lib/IR/Function.cpp +@@ -37,6 +37,7 @@ + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsDirectX.h" + #include "llvm/IR/IntrinsicsHexagon.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsMips.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" +-- +2.20.1 + diff --git a/0012-LoongArch-Add-LSX-intrinsic-support.patch b/0012-LoongArch-Add-LSX-intrinsic-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..146c1e669cd13fa7984b727524714a3d8ea96617 --- /dev/null +++ b/0012-LoongArch-Add-LSX-intrinsic-support.patch @@ -0,0 +1,2726 @@ +From b1101237c46337236b43f9dbae88b03bf0a526f8 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 19 Aug 2023 15:58:38 +0800 +Subject: [PATCH 12/66] [LoongArch] Add LSX intrinsic support + +For handling intrinsics, our approach is not simply to match them +one-to-one with instructions. Instead, we lower some intrinsics +to common nodes and then perform matching. The advantage of this +approach is that it allows us to fully utilize the passes available +at the common layer for optimizing purposes. + +We perform error checks on the immediate operand of all intrinsics, +rather than waiting until the end to throw exceptions. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155829 + +(cherry picked from commit 53141b2fcfa20616970833e6513537d211116c05) + +Change-Id: Icde9a81871e71ab7875056daa79568d566a07a07 +--- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 524 ++++++++++ + .../LoongArch/LoongArchISelDAGToDAG.cpp | 100 +- + .../Target/LoongArch/LoongArchISelDAGToDAG.h | 8 + + .../LoongArch/LoongArchISelLowering.cpp | 902 +++++++++++++++++- + .../Target/LoongArch/LoongArchISelLowering.h | 14 + + .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + + .../Target/LoongArch/LoongArchInstrInfo.td | 6 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 816 ++++++++++++++++ + 8 files changed, 2359 insertions(+), 23 deletions(-) + +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index 4219b2f55346..d39d8261ebe3 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -123,3 +123,527 @@ def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; + } // TargetPrefix = "loongarch" ++ ++/// Vector intrinsic ++ ++class VecInt ret_types, list param_types, ++ list intr_properties = []> ++ : Intrinsic, ++ ClangBuiltin; ++ ++//===----------------------------------------------------------------------===// ++// LSX ++ ++let TargetPrefix = "loongarch" in { ++ ++foreach inst = ["vadd_b", "vsub_b", ++ "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", ++ "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", ++ "vabsd_b", "vabsd_bu", "vadda_b", ++ "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", ++ "vmul_b", "vmuh_b", "vmuh_bu", ++ "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", ++ "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", ++ "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", ++ "vbitclr_b", "vbitset_b", "vbitrev_b", ++ "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", ++ "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", ++ "vilvl_b", "vilvh_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_h", "vsub_h", ++ "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", ++ "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", ++ "vabsd_h", "vabsd_hu", "vadda_h", ++ "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", ++ "vmul_h", "vmuh_h", "vmuh_hu", ++ "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", ++ "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", ++ "vbitclr_h", "vbitset_h", "vbitrev_h", ++ "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", ++ "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", ++ "vilvl_h", "vilvh_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_w", "vsub_w", ++ "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", ++ "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", ++ "vabsd_w", "vabsd_wu", "vadda_w", ++ "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", ++ "vmul_w", "vmuh_w", "vmuh_wu", ++ "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", ++ "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", ++ "vbitclr_w", "vbitset_w", "vbitrev_w", ++ "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", ++ "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", ++ "vilvl_w", "vilvh_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", ++ "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", ++ "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", ++ "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", ++ "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", ++ "vaddwev_q_du_d", "vaddwod_q_du_d", ++ "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", ++ "vabsd_d", "vabsd_du", "vadda_d", ++ "vmax_d", "vmax_du", "vmin_d", "vmin_du", ++ "vmul_d", "vmuh_d", "vmuh_du", ++ "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", ++ "vmulwev_q_du_d", "vmulwod_q_du_d", ++ "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", ++ "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", ++ "vbitclr_d", "vbitset_d", "vbitrev_d", ++ "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", ++ "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", ++ "vilvl_d", "vilvh_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vaddi_bu", "vsubi_bu", ++ "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", ++ "vsat_b", "vsat_bu", ++ "vandi_b", "vori_b", "vxori_b", "vnori_b", ++ "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", ++ "vsrlri_b", "vsrari_b", ++ "vbitclri_b", "vbitseti_b", "vbitrevi_b", ++ "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", ++ "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_hu", "vsubi_hu", ++ "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", ++ "vsat_h", "vsat_hu", ++ "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", ++ "vsrlri_h", "vsrari_h", ++ "vbitclri_h", "vbitseti_h", "vbitrevi_h", ++ "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", ++ "vreplvei_h", "vshuf4i_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_wu", "vsubi_wu", ++ "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", ++ "vsat_w", "vsat_wu", ++ "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", ++ "vsrlri_w", "vsrari_w", ++ "vbitclri_w", "vbitseti_w", "vbitrevi_w", ++ "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", ++ "vreplvei_w", "vshuf4i_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vaddi_du", "vsubi_du", ++ "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", ++ "vsat_d", "vsat_du", ++ "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", ++ "vsrlri_d", "vsrari_d", ++ "vbitclri_d", "vbitseti_d", "vbitrevi_d", ++ "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", ++ "vreplvei_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", ++ "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", ++ "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", ++ "vaddwev_h_bu_b", "vaddwod_h_bu_b", ++ "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", ++ "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", ++ "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", ++ "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", ++ "vaddwev_w_hu_h", "vaddwod_w_hu_h", ++ "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", ++ "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", ++ "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", ++ "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", ++ "vaddwev_d_wu_w", "vaddwod_d_wu_w", ++ "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", ++ "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", ++ "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", ++ "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", ++ "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", ++ "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", ++ "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", ++ "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", ++ "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", ++ "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", ++ "vfrstpi_b", "vbitseli_b", "vextrins_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", ++ "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", ++ "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", ++ "vfrstpi_h", "vextrins_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", ++ "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", ++ "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", ++ "vpermi_w", "vextrins_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", ++ "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", ++ "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", ++ "vshuf4i_d", "vextrins_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", ++ "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v8i16_ty], ++ [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", ++ "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4i32_ty], ++ [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", ++ "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", ++ "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", ++ "vclo_b", "vclz_b", "vpcnt_b"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", ++ "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vexth_h_b", "vexth_hu_bu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vexth_w_h", "vexth_wu_hu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vexth_d_w", "vexth_du_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vinsgr2vr_b ++ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_h ++ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_w ++ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lsx_vinsgr2vr_d ++ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_vreplve_b ++ : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_h ++ : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_w ++ : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_d ++ : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v8i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v4i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], ++ [llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++// LSX Float ++ ++foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", ++ "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", ++ "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v4f32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in ++ def int_loongarch_lsx_#inst ++ : VecInt<[llvm_v2f64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", ++ "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", ++ "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", ++ "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", ++ "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", ++ "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", ++ "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", ++ "vftinth_l_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffint_s_w", "vffint_s_wu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vffint_d_l", "vffint_d_lu"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffintl_d_w", "vffinth_d_w"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vffint_s_l"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", ++ "vftint_w_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcvt_h_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcvt_s_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", ++ "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", ++ "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", ++ "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", ++ "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", ++ "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], ++ [llvm_v4f32_ty, llvm_v4f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", ++ "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", ++ "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", ++ "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", ++ "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", ++ "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in ++ def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], ++ [llvm_v2f64_ty, llvm_v2f64_ty], ++ [IntrNoMem]>; ++ ++// LSX load/store ++def int_loongarch_lsx_vld ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldx ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_b ++ : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_h ++ : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_w ++ : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vldrepl_d ++ : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++ ++def int_loongarch_lsx_vst ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lsx_vstx ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrWriteMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_b ++ : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_h ++ : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_w ++ : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lsx_vstelm_d ++ : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++ ++} // TargetPrefix = "loongarch" +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +index ae7167cb5ce7..f55184019988 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -15,6 +15,7 @@ + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "MCTargetDesc/LoongArchMatInt.h" + #include "llvm/Support/KnownBits.h" ++#include "llvm/Support/raw_ostream.h" + + using namespace llvm; + +@@ -75,7 +76,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { + ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); + return; + } +- // TODO: Add selection nodes needed later. ++ case ISD::BITCAST: { ++ if (VT.is128BitVector() || VT.is512BitVector()) { ++ ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); ++ CurDAG->RemoveDeadNode(Node); ++ return; ++ } ++ break; ++ } + } + + // Select the default instruction. +@@ -262,6 +270,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { + return false; + } + ++bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, ++ unsigned MinSizeInBits) const { ++ if (!Subtarget->hasExtLSX()) ++ return false; ++ ++ BuildVectorSDNode *Node = dyn_cast(N); ++ ++ if (!Node) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ MinSizeInBits, /*IsBigEndian=*/false)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++template ++bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { ++ SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), ++ Subtarget->getGRLenVT()); ++ return true; ++ } ++ if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { ++ SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), ++ Subtarget->getGRLenVT()); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, ++ SDValue &SplatImm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = (~ImmValue).exactLogBase2(); ++ ++ if (Log2 != -1) { ++ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, ++ SDValue &SplatImm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = ImmValue.exactLogBase2(); ++ ++ if (Log2 != -1) { ++ SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready + // for instruction scheduling. + FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +index 3099407aea3e..5e3d6ccc3755 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +@@ -56,6 +56,14 @@ public: + bool selectSExti32(SDValue N, SDValue &Val); + bool selectZExti32(SDValue N, SDValue &Val); + ++ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; ++ ++ template ++ bool selectVSplatImm(SDValue N, SDValue &SplatVal); ++ ++ bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; ++ bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; ++ + // Include the pieces autogenerated from the target description. + #include "LoongArchGenDAGISel.inc" + }; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index db5961fc501a..c05133647929 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -62,6 +62,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + MVT::v4i64}) + addRegisterClass(VT, &LoongArch::LASX256RegClass); + ++ static const MVT::SimpleValueType LSXVTs[] = { ++ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; ++ ++ if (Subtarget.hasExtLSX()) ++ for (MVT VT : LSXVTs) ++ addRegisterClass(VT, &LoongArch::LSX128RegClass); ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +@@ -109,6 +116,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + if (Subtarget.hasBasicF()) +@@ -138,6 +146,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + } + + static const ISD::CondCode FPCCToExpand[] = { +@@ -194,6 +203,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + } + ++ if (Subtarget.hasExtLSX()) ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, ++ {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -215,6 +228,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRL); ++ if (Subtarget.hasExtLSX()) ++ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + } + + bool LoongArchTargetLowering::isOffsetFoldingLegal( +@@ -652,9 +667,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, + return Addr; + } + ++template ++static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, ++ SelectionDAG &DAG, bool IsSigned = false) { ++ auto *CImm = cast(Op->getOperand(ImmOp)); ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Op->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); ++ } ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { ++ SDLoc DL(Op); + switch (Op.getConstantOperandVal(0)) { + default: + return SDValue(); // Don't custom lower most intrinsics. +@@ -662,6 +692,141 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + return DAG.getRegister(LoongArch::R2, PtrVT); + } ++ case Intrinsic::loongarch_lsx_vpickve2gr_d: ++ case Intrinsic::loongarch_lsx_vpickve2gr_du: ++ case Intrinsic::loongarch_lsx_vreplvei_d: ++ return checkIntrinsicImmArg<1>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vreplvei_w: ++ return checkIntrinsicImmArg<2>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ case Intrinsic::loongarch_lsx_vrotri_b: ++ case Intrinsic::loongarch_lsx_vsllwil_h_b: ++ case Intrinsic::loongarch_lsx_vsllwil_hu_bu: ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ case Intrinsic::loongarch_lsx_vreplvei_h: ++ return checkIntrinsicImmArg<3>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ case Intrinsic::loongarch_lsx_vrotri_h: ++ case Intrinsic::loongarch_lsx_vsllwil_w_h: ++ case Intrinsic::loongarch_lsx_vsllwil_wu_hu: ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ case Intrinsic::loongarch_lsx_vreplvei_b: ++ return checkIntrinsicImmArg<4>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_b_h: ++ case Intrinsic::loongarch_lsx_vsrani_b_h: ++ case Intrinsic::loongarch_lsx_vsrlrni_b_h: ++ case Intrinsic::loongarch_lsx_vsrarni_b_h: ++ case Intrinsic::loongarch_lsx_vssrlni_b_h: ++ case Intrinsic::loongarch_lsx_vssrani_b_h: ++ case Intrinsic::loongarch_lsx_vssrlni_bu_h: ++ case Intrinsic::loongarch_lsx_vssrani_bu_h: ++ case Intrinsic::loongarch_lsx_vssrlrni_b_h: ++ case Intrinsic::loongarch_lsx_vssrarni_b_h: ++ case Intrinsic::loongarch_lsx_vssrlrni_bu_h: ++ case Intrinsic::loongarch_lsx_vssrarni_bu_h: ++ return checkIntrinsicImmArg<4>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ case Intrinsic::loongarch_lsx_vrotri_w: ++ case Intrinsic::loongarch_lsx_vsllwil_d_w: ++ case Intrinsic::loongarch_lsx_vsllwil_du_wu: ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ case Intrinsic::loongarch_lsx_vslei_bu: ++ case Intrinsic::loongarch_lsx_vslei_hu: ++ case Intrinsic::loongarch_lsx_vslei_wu: ++ case Intrinsic::loongarch_lsx_vslei_du: ++ case Intrinsic::loongarch_lsx_vslti_bu: ++ case Intrinsic::loongarch_lsx_vslti_hu: ++ case Intrinsic::loongarch_lsx_vslti_wu: ++ case Intrinsic::loongarch_lsx_vslti_du: ++ case Intrinsic::loongarch_lsx_vbsll_v: ++ case Intrinsic::loongarch_lsx_vbsrl_v: ++ return checkIntrinsicImmArg<5>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vseqi_b: ++ case Intrinsic::loongarch_lsx_vseqi_h: ++ case Intrinsic::loongarch_lsx_vseqi_w: ++ case Intrinsic::loongarch_lsx_vseqi_d: ++ case Intrinsic::loongarch_lsx_vslei_b: ++ case Intrinsic::loongarch_lsx_vslei_h: ++ case Intrinsic::loongarch_lsx_vslei_w: ++ case Intrinsic::loongarch_lsx_vslei_d: ++ case Intrinsic::loongarch_lsx_vslti_b: ++ case Intrinsic::loongarch_lsx_vslti_h: ++ case Intrinsic::loongarch_lsx_vslti_w: ++ case Intrinsic::loongarch_lsx_vslti_d: ++ return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); ++ case Intrinsic::loongarch_lsx_vsrlni_h_w: ++ case Intrinsic::loongarch_lsx_vsrani_h_w: ++ case Intrinsic::loongarch_lsx_vsrlrni_h_w: ++ case Intrinsic::loongarch_lsx_vsrarni_h_w: ++ case Intrinsic::loongarch_lsx_vssrlni_h_w: ++ case Intrinsic::loongarch_lsx_vssrani_h_w: ++ case Intrinsic::loongarch_lsx_vssrlni_hu_w: ++ case Intrinsic::loongarch_lsx_vssrani_hu_w: ++ case Intrinsic::loongarch_lsx_vssrlrni_h_w: ++ case Intrinsic::loongarch_lsx_vssrarni_h_w: ++ case Intrinsic::loongarch_lsx_vssrlrni_hu_w: ++ case Intrinsic::loongarch_lsx_vssrarni_hu_w: ++ case Intrinsic::loongarch_lsx_vfrstpi_b: ++ case Intrinsic::loongarch_lsx_vfrstpi_h: ++ return checkIntrinsicImmArg<5>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_du: ++ case Intrinsic::loongarch_lsx_vrotri_d: ++ case Intrinsic::loongarch_lsx_vsrlri_d: ++ case Intrinsic::loongarch_lsx_vsrari_d: ++ return checkIntrinsicImmArg<6>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_w_d: ++ case Intrinsic::loongarch_lsx_vsrani_w_d: ++ case Intrinsic::loongarch_lsx_vsrlrni_w_d: ++ case Intrinsic::loongarch_lsx_vsrarni_w_d: ++ case Intrinsic::loongarch_lsx_vssrlni_w_d: ++ case Intrinsic::loongarch_lsx_vssrani_w_d: ++ case Intrinsic::loongarch_lsx_vssrlni_wu_d: ++ case Intrinsic::loongarch_lsx_vssrani_wu_d: ++ case Intrinsic::loongarch_lsx_vssrlrni_w_d: ++ case Intrinsic::loongarch_lsx_vssrarni_w_d: ++ case Intrinsic::loongarch_lsx_vssrlrni_wu_d: ++ case Intrinsic::loongarch_lsx_vssrarni_wu_d: ++ return checkIntrinsicImmArg<6>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vsrlni_d_q: ++ case Intrinsic::loongarch_lsx_vsrani_d_q: ++ case Intrinsic::loongarch_lsx_vsrlrni_d_q: ++ case Intrinsic::loongarch_lsx_vsrarni_d_q: ++ case Intrinsic::loongarch_lsx_vssrlni_d_q: ++ case Intrinsic::loongarch_lsx_vssrani_d_q: ++ case Intrinsic::loongarch_lsx_vssrlni_du_q: ++ case Intrinsic::loongarch_lsx_vssrani_du_q: ++ case Intrinsic::loongarch_lsx_vssrlrni_d_q: ++ case Intrinsic::loongarch_lsx_vssrarni_d_q: ++ case Intrinsic::loongarch_lsx_vssrlrni_du_q: ++ case Intrinsic::loongarch_lsx_vssrarni_du_q: ++ return checkIntrinsicImmArg<7>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vnori_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_h: ++ case Intrinsic::loongarch_lsx_vshuf4i_w: ++ return checkIntrinsicImmArg<8>(Op, 2, DAG); ++ case Intrinsic::loongarch_lsx_vshuf4i_d: ++ case Intrinsic::loongarch_lsx_vpermi_w: ++ case Intrinsic::loongarch_lsx_vbitseli_b: ++ case Intrinsic::loongarch_lsx_vextrins_b: ++ case Intrinsic::loongarch_lsx_vextrins_h: ++ case Intrinsic::loongarch_lsx_vextrins_w: ++ case Intrinsic::loongarch_lsx_vextrins_d: ++ return checkIntrinsicImmArg<8>(Op, 3, DAG); ++ case Intrinsic::loongarch_lsx_vrepli_b: ++ case Intrinsic::loongarch_lsx_vrepli_h: ++ case Intrinsic::loongarch_lsx_vrepli_w: ++ case Intrinsic::loongarch_lsx_vrepli_d: ++ return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); ++ case Intrinsic::loongarch_lsx_vldi: ++ return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); + } + } + +@@ -757,6 +922,29 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, + {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } ++ case Intrinsic::loongarch_lsx_vld: ++ case Intrinsic::loongarch_lsx_vldrepl_b: ++ return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_h: ++ return !isShiftedInt<11, 1>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_w: ++ return !isShiftedInt<10, 2>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vldrepl_d: ++ return !isShiftedInt<9, 3>( ++ cast(Op.getOperand(3))->getSExtValue()) ++ ? emitIntrinsicWithChainErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + } + } + +@@ -875,6 +1063,36 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : Op; + } ++ case Intrinsic::loongarch_lsx_vst: ++ return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_b: ++ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_h: ++ return (!isShiftedInt<8, 1>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_w: ++ return (!isShiftedInt<8, 2>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); ++ case Intrinsic::loongarch_lsx_vstelm_d: ++ return (!isShiftedInt<8, 3>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + } + } + +@@ -1026,16 +1244,110 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); + } + +-// Helper function that emits error message for intrinsics with chain and return +-// a UNDEF and the chain as the results. +-static void emitErrorAndReplaceIntrinsicWithChainResults( ++// Helper function that emits error message for intrinsics with/without chain ++// and return a UNDEF or and the chain as the results. ++static void emitErrorAndReplaceIntrinsicResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, +- StringRef ErrorMsg) { ++ StringRef ErrorMsg, bool WithChain = true) { + DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); + Results.push_back(DAG.getUNDEF(N->getValueType(0))); ++ if (!WithChain) ++ return; + Results.push_back(N->getOperand(0)); + } + ++template ++static void ++replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, ++ SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, ++ unsigned ResOp) { ++ const StringRef ErrorMsgOOR = "argument out of range"; ++ unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); ++ if (!isUInt(Imm)) { ++ emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, ++ /*WithChain=*/false); ++ return; ++ } ++ SDLoc DL(Node); ++ SDValue Vec = Node->getOperand(1); ++ ++ SDValue PickElt = ++ DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, ++ DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), ++ DAG.getValueType(Vec.getValueType().getVectorElementType())); ++ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), ++ PickElt.getValue(0))); ++} ++ ++static void replaceVecCondBranchResults(SDNode *N, ++ SmallVectorImpl &Results, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget, ++ unsigned ResOp) { ++ SDLoc DL(N); ++ SDValue Vec = N->getOperand(1); ++ ++ SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); ++ Results.push_back( ++ DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); ++} ++ ++static void ++replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ switch (N->getConstantOperandVal(0)) { ++ default: ++ llvm_unreachable("Unexpected Intrinsic."); ++ case Intrinsic::loongarch_lsx_vpickve2gr_b: ++ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_h: ++ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_w: ++ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_SEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_bu: ++ replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_hu: ++ replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_vpickve2gr_wu: ++ replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, ++ LoongArchISD::VPICK_ZEXT_ELT); ++ break; ++ case Intrinsic::loongarch_lsx_bz_b: ++ case Intrinsic::loongarch_lsx_bz_h: ++ case Intrinsic::loongarch_lsx_bz_w: ++ case Intrinsic::loongarch_lsx_bz_d: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VALL_ZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bz_v: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VANY_ZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bnz_b: ++ case Intrinsic::loongarch_lsx_bnz_h: ++ case Intrinsic::loongarch_lsx_bnz_w: ++ case Intrinsic::loongarch_lsx_bnz_d: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VALL_NONZERO); ++ break; ++ case Intrinsic::loongarch_lsx_bnz_v: ++ replaceVecCondBranchResults(N, Results, DAG, Subtarget, ++ LoongArchISD::VANY_NONZERO); ++ break; ++ } ++} ++ + void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDLoc DL(N); +@@ -1168,14 +1480,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( + llvm_unreachable("Unexpected Intrinsic."); + case Intrinsic::loongarch_movfcsr2gr: { + if (!Subtarget.hasBasicF()) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgReqF); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); + return; + } + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<2>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue MOVFCSR2GRResults = DAG.getNode( +@@ -1211,7 +1521,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + {Chain, Op2, \ + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ +- Results.push_back(NODE.getValue(1)); \ ++ Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) +@@ -1220,8 +1530,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + #define CSR_CASE(ID) \ + case Intrinsic::loongarch_##ID: { \ + if (!Subtarget.is64Bit()) \ +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ +- ErrorMsgReqLA64); \ ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ + break; \ + } + CSR_CASE(csrrd_d); +@@ -1232,8 +1541,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrrd_w: { + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRRDResults = +@@ -1247,8 +1555,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrwr_w: { + unsigned Imm = cast(N->getOperand(3))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRWRResults = +@@ -1263,8 +1570,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrxchg_w: { + unsigned Imm = cast(N->getOperand(4))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgOOR); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); + return; + } + SDValue CSRXCHGResults = DAG.getNode( +@@ -1302,8 +1608,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( + } + case Intrinsic::loongarch_lddir_d: { + if (!Subtarget.is64Bit()) { +- emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, +- ErrorMsgReqLA64); ++ emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); + return; + } + break; +@@ -1322,6 +1627,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( + Results.push_back(N->getOperand(0)); + break; + } ++ case ISD::INTRINSIC_WO_CHAIN: { ++ replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); ++ break; ++ } + } + } + +@@ -1685,6 +1994,440 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, + Src.getOperand(0)); + } + ++template ++static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget, ++ bool IsSigned = false) { ++ SDLoc DL(Node); ++ auto *CImm = cast(Node->getOperand(ImmOp)); ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); ++ } ++ return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); ++} ++ ++template ++static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, ++ SelectionDAG &DAG, bool IsSigned = false) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(ImmOp)); ++ ++ // Check the ImmArg. ++ if ((IsSigned && !isInt(CImm->getSExtValue())) || ++ (!IsSigned && !isUInt(CImm->getZExtValue()))) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ return DAG.getConstant( ++ APInt(ResTy.getScalarType().getSizeInBits(), ++ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), ++ DL, ResTy); ++} ++ ++static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ SDValue Vec = Node->getOperand(2); ++ SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); ++ return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); ++} ++ ++static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, ResTy); ++ SDValue Bit = ++ DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), ++ DAG.getNOT(DL, Bit, ResTy)); ++} ++ ++template ++static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); ++} ++ ++template ++static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); ++ return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); ++} ++ ++template ++static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { ++ SDLoc DL(Node); ++ EVT ResTy = Node->getValueType(0); ++ auto *CImm = cast(Node->getOperand(2)); ++ // Check the unsigned ImmArg. ++ if (!isUInt(CImm->getZExtValue())) { ++ DAG.getContext()->emitError(Node->getOperationName(0) + ++ ": argument out of range."); ++ return DAG.getNode(ISD::UNDEF, DL, ResTy); ++ } ++ ++ APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); ++ SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); ++ return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); ++} ++ ++static SDValue ++performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(N); ++ switch (N->getConstantOperandVal(0)) { ++ default: ++ break; ++ case Intrinsic::loongarch_lsx_vadd_b: ++ case Intrinsic::loongarch_lsx_vadd_h: ++ case Intrinsic::loongarch_lsx_vadd_w: ++ case Intrinsic::loongarch_lsx_vadd_d: ++ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vaddi_bu: ++ case Intrinsic::loongarch_lsx_vaddi_hu: ++ case Intrinsic::loongarch_lsx_vaddi_wu: ++ case Intrinsic::loongarch_lsx_vaddi_du: ++ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsub_b: ++ case Intrinsic::loongarch_lsx_vsub_h: ++ case Intrinsic::loongarch_lsx_vsub_w: ++ case Intrinsic::loongarch_lsx_vsub_d: ++ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vsubi_bu: ++ case Intrinsic::loongarch_lsx_vsubi_hu: ++ case Intrinsic::loongarch_lsx_vsubi_wu: ++ case Intrinsic::loongarch_lsx_vsubi_du: ++ return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vneg_b: ++ case Intrinsic::loongarch_lsx_vneg_h: ++ case Intrinsic::loongarch_lsx_vneg_w: ++ case Intrinsic::loongarch_lsx_vneg_d: ++ return DAG.getNode( ++ ISD::SUB, DL, N->getValueType(0), ++ DAG.getConstant( ++ APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, ++ /*isSigned=*/true), ++ SDLoc(N), N->getValueType(0)), ++ N->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vmax_b: ++ case Intrinsic::loongarch_lsx_vmax_h: ++ case Intrinsic::loongarch_lsx_vmax_w: ++ case Intrinsic::loongarch_lsx_vmax_d: ++ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmax_bu: ++ case Intrinsic::loongarch_lsx_vmax_hu: ++ case Intrinsic::loongarch_lsx_vmax_wu: ++ case Intrinsic::loongarch_lsx_vmax_du: ++ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmaxi_b: ++ case Intrinsic::loongarch_lsx_vmaxi_h: ++ case Intrinsic::loongarch_lsx_vmaxi_w: ++ case Intrinsic::loongarch_lsx_vmaxi_d: ++ return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); ++ case Intrinsic::loongarch_lsx_vmaxi_bu: ++ case Intrinsic::loongarch_lsx_vmaxi_hu: ++ case Intrinsic::loongarch_lsx_vmaxi_wu: ++ case Intrinsic::loongarch_lsx_vmaxi_du: ++ return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmin_b: ++ case Intrinsic::loongarch_lsx_vmin_h: ++ case Intrinsic::loongarch_lsx_vmin_w: ++ case Intrinsic::loongarch_lsx_vmin_d: ++ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_bu: ++ case Intrinsic::loongarch_lsx_vmin_hu: ++ case Intrinsic::loongarch_lsx_vmin_wu: ++ case Intrinsic::loongarch_lsx_vmin_du: ++ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmini_b: ++ case Intrinsic::loongarch_lsx_vmini_h: ++ case Intrinsic::loongarch_lsx_vmini_w: ++ case Intrinsic::loongarch_lsx_vmini_d: ++ return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); ++ case Intrinsic::loongarch_lsx_vmini_bu: ++ case Intrinsic::loongarch_lsx_vmini_hu: ++ case Intrinsic::loongarch_lsx_vmini_wu: ++ case Intrinsic::loongarch_lsx_vmini_du: ++ return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmul_b: ++ case Intrinsic::loongarch_lsx_vmul_h: ++ case Intrinsic::loongarch_lsx_vmul_w: ++ case Intrinsic::loongarch_lsx_vmul_d: ++ return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmadd_b: ++ case Intrinsic::loongarch_lsx_vmadd_h: ++ case Intrinsic::loongarch_lsx_vmadd_w: ++ case Intrinsic::loongarch_lsx_vmadd_d: { ++ EVT ResTy = N->getValueType(0); ++ return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), ++ N->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vmsub_b: ++ case Intrinsic::loongarch_lsx_vmsub_h: ++ case Intrinsic::loongarch_lsx_vmsub_w: ++ case Intrinsic::loongarch_lsx_vmsub_d: { ++ EVT ResTy = N->getValueType(0); ++ return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), ++ N->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vdiv_b: ++ case Intrinsic::loongarch_lsx_vdiv_h: ++ case Intrinsic::loongarch_lsx_vdiv_w: ++ case Intrinsic::loongarch_lsx_vdiv_d: ++ return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vdiv_bu: ++ case Intrinsic::loongarch_lsx_vdiv_hu: ++ case Intrinsic::loongarch_lsx_vdiv_wu: ++ case Intrinsic::loongarch_lsx_vdiv_du: ++ return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_b: ++ case Intrinsic::loongarch_lsx_vmod_h: ++ case Intrinsic::loongarch_lsx_vmod_w: ++ case Intrinsic::loongarch_lsx_vmod_d: ++ return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_bu: ++ case Intrinsic::loongarch_lsx_vmod_hu: ++ case Intrinsic::loongarch_lsx_vmod_wu: ++ case Intrinsic::loongarch_lsx_vmod_du: ++ return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vand_v: ++ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vor_v: ++ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vxor_v: ++ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vnor_v: { ++ SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ return DAG.getNOT(DL, Res, Res->getValueType(0)); ++ } ++ case Intrinsic::loongarch_lsx_vandi_b: ++ return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vori_b: ++ return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vxori_b: ++ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<8>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsll_b: ++ case Intrinsic::loongarch_lsx_vsll_h: ++ case Intrinsic::loongarch_lsx_vsll_w: ++ case Intrinsic::loongarch_lsx_vsll_d: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_b: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_h: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_w: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_d: ++ return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrl_b: ++ case Intrinsic::loongarch_lsx_vsrl_h: ++ case Intrinsic::loongarch_lsx_vsrl_w: ++ case Intrinsic::loongarch_lsx_vsrl_d: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_b: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_h: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_w: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_d: ++ return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsra_b: ++ case Intrinsic::loongarch_lsx_vsra_h: ++ case Intrinsic::loongarch_lsx_vsra_w: ++ case Intrinsic::loongarch_lsx_vsra_d: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ truncateVecElts(N, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_b: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<3>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_h: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<4>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_w: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<5>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrai_d: ++ return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), ++ lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vpcnt_b: ++ case Intrinsic::loongarch_lsx_vpcnt_h: ++ case Intrinsic::loongarch_lsx_vpcnt_w: ++ case Intrinsic::loongarch_lsx_vpcnt_d: ++ return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vbitclr_b: ++ case Intrinsic::loongarch_lsx_vbitclr_h: ++ case Intrinsic::loongarch_lsx_vbitclr_w: ++ case Intrinsic::loongarch_lsx_vbitclr_d: ++ return lowerVectorBitClear(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_b: ++ return lowerVectorBitClearImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_h: ++ return lowerVectorBitClearImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_w: ++ return lowerVectorBitClearImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitclri_d: ++ return lowerVectorBitClearImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitset_b: ++ case Intrinsic::loongarch_lsx_vbitset_h: ++ case Intrinsic::loongarch_lsx_vbitset_w: ++ case Intrinsic::loongarch_lsx_vbitset_d: { ++ EVT VecTy = N->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, VecTy); ++ return DAG.getNode( ++ ISD::OR, DL, VecTy, N->getOperand(1), ++ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); ++ } ++ case Intrinsic::loongarch_lsx_vbitseti_b: ++ return lowerVectorBitSetImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_h: ++ return lowerVectorBitSetImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_w: ++ return lowerVectorBitSetImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitseti_d: ++ return lowerVectorBitSetImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrev_b: ++ case Intrinsic::loongarch_lsx_vbitrev_h: ++ case Intrinsic::loongarch_lsx_vbitrev_w: ++ case Intrinsic::loongarch_lsx_vbitrev_d: { ++ EVT VecTy = N->getValueType(0); ++ SDValue One = DAG.getConstant(1, DL, VecTy); ++ return DAG.getNode( ++ ISD::XOR, DL, VecTy, N->getOperand(1), ++ DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); ++ } ++ case Intrinsic::loongarch_lsx_vbitrevi_b: ++ return lowerVectorBitRevImm<3>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_h: ++ return lowerVectorBitRevImm<4>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_w: ++ return lowerVectorBitRevImm<5>(N, DAG); ++ case Intrinsic::loongarch_lsx_vbitrevi_d: ++ return lowerVectorBitRevImm<6>(N, DAG); ++ case Intrinsic::loongarch_lsx_vfadd_s: ++ case Intrinsic::loongarch_lsx_vfadd_d: ++ return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfsub_s: ++ case Intrinsic::loongarch_lsx_vfsub_d: ++ return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfmul_s: ++ case Intrinsic::loongarch_lsx_vfmul_d: ++ return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfdiv_s: ++ case Intrinsic::loongarch_lsx_vfdiv_d: ++ return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfmadd_s: ++ case Intrinsic::loongarch_lsx_vfmadd_d: ++ return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), ++ N->getOperand(2), N->getOperand(3)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_b: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_h: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_w: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vinsgr2vr_d: ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), ++ N->getOperand(1), N->getOperand(2), ++ legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); ++ case Intrinsic::loongarch_lsx_vreplgr2vr_b: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_h: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_w: ++ case Intrinsic::loongarch_lsx_vreplgr2vr_d: { ++ EVT ResTy = N->getValueType(0); ++ SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); ++ return DAG.getBuildVector(ResTy, DL, Ops); ++ } ++ case Intrinsic::loongarch_lsx_vreplve_b: ++ case Intrinsic::loongarch_lsx_vreplve_h: ++ case Intrinsic::loongarch_lsx_vreplve_w: ++ case Intrinsic::loongarch_lsx_vreplve_d: ++ return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), ++ N->getOperand(1), ++ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), ++ N->getOperand(2))); ++ } ++ return SDValue(); ++} ++ + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -1699,6 +2442,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, + return performSRLCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::BITREV_W: + return performBITREV_WCombine(N, DAG, DCI, Subtarget); ++ case ISD::INTRINSIC_WO_CHAIN: ++ return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); + } + return SDValue(); + } +@@ -1752,6 +2497,101 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + return SinkMBB; + } + ++static MachineBasicBlock * ++emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, ++ const LoongArchSubtarget &Subtarget) { ++ unsigned CondOpc; ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected opcode"); ++ case LoongArch::PseudoVBZ: ++ CondOpc = LoongArch::VSETEQZ_V; ++ break; ++ case LoongArch::PseudoVBZ_B: ++ CondOpc = LoongArch::VSETANYEQZ_B; ++ break; ++ case LoongArch::PseudoVBZ_H: ++ CondOpc = LoongArch::VSETANYEQZ_H; ++ break; ++ case LoongArch::PseudoVBZ_W: ++ CondOpc = LoongArch::VSETANYEQZ_W; ++ break; ++ case LoongArch::PseudoVBZ_D: ++ CondOpc = LoongArch::VSETANYEQZ_D; ++ break; ++ case LoongArch::PseudoVBNZ: ++ CondOpc = LoongArch::VSETNEZ_V; ++ break; ++ case LoongArch::PseudoVBNZ_B: ++ CondOpc = LoongArch::VSETALLNEZ_B; ++ break; ++ case LoongArch::PseudoVBNZ_H: ++ CondOpc = LoongArch::VSETALLNEZ_H; ++ break; ++ case LoongArch::PseudoVBNZ_W: ++ CondOpc = LoongArch::VSETALLNEZ_W; ++ break; ++ case LoongArch::PseudoVBNZ_D: ++ CondOpc = LoongArch::VSETALLNEZ_D; ++ break; ++ } ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ DebugLoc DL = MI.getDebugLoc(); ++ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); ++ ++ F->insert(It, FalseBB); ++ F->insert(It, TrueBB); ++ F->insert(It, SinkBB); ++ ++ // Transfer the remainder of MBB and its successor edges to Sink. ++ SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); ++ SinkBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Insert the real instruction to BB. ++ Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); ++ BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); ++ ++ // Insert branch. ++ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); ++ BB->addSuccessor(FalseBB); ++ BB->addSuccessor(TrueBB); ++ ++ // FalseBB. ++ Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); ++ BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) ++ .addReg(LoongArch::R0) ++ .addImm(0); ++ BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); ++ FalseBB->addSuccessor(SinkBB); ++ ++ // TrueBB. ++ Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); ++ BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) ++ .addReg(LoongArch::R0) ++ .addImm(1); ++ TrueBB->addSuccessor(SinkBB); ++ ++ // SinkBB: merge the results. ++ BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(RD1) ++ .addMBB(FalseBB) ++ .addReg(RD2) ++ .addMBB(TrueBB); ++ ++ // The pseudo instruction is gone now. ++ MI.eraseFromParent(); ++ return SinkBB; ++} ++ + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -1786,6 +2626,17 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MI.eraseFromParent(); + return BB; + } ++ case LoongArch::PseudoVBZ: ++ case LoongArch::PseudoVBZ_B: ++ case LoongArch::PseudoVBZ_H: ++ case LoongArch::PseudoVBZ_W: ++ case LoongArch::PseudoVBZ_D: ++ case LoongArch::PseudoVBNZ: ++ case LoongArch::PseudoVBNZ_B: ++ case LoongArch::PseudoVBNZ_H: ++ case LoongArch::PseudoVBNZ_W: ++ case LoongArch::PseudoVBNZ_D: ++ return emitVecCondBranchPseudo(MI, BB, Subtarget); + } + } + +@@ -1858,6 +2709,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + NODE_NAME_CASE(MOVFCSR2GR) + NODE_NAME_CASE(CACOP_D) + NODE_NAME_CASE(CACOP_W) ++ NODE_NAME_CASE(VPICK_SEXT_ELT) ++ NODE_NAME_CASE(VPICK_ZEXT_ELT) ++ NODE_NAME_CASE(VREPLVE) ++ NODE_NAME_CASE(VALL_ZERO) ++ NODE_NAME_CASE(VANY_ZERO) ++ NODE_NAME_CASE(VALL_NONZERO) ++ NODE_NAME_CASE(VANY_NONZERO) + } + #undef NODE_NAME_CASE + return nullptr; +@@ -1884,6 +2742,10 @@ const MCPhysReg ArgFPR64s[] = { + LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, + LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; + ++const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, ++ LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, ++ LoongArch::VR6, LoongArch::VR7}; ++ + // Pass a 2*GRLen argument that has been split into two GRLen values through + // registers or the stack as necessary. + static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, +@@ -2030,6 +2892,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + Reg = State.AllocateReg(ArgFPR32s); + else if (ValVT == MVT::f64 && !UseGPRForFloat) + Reg = State.AllocateReg(ArgFPR64s); ++ else if (ValVT.is128BitVector()) ++ Reg = State.AllocateReg(ArgVRs); + else + Reg = State.AllocateReg(ArgGPRs); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 500407493fe5..7765057ebffb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -110,6 +110,20 @@ enum NodeType : unsigned { + + // Read CPU configuration information operation + CPUCFG, ++ ++ // Vector Shuffle ++ VREPLVE, ++ ++ // Extended vector element extraction ++ VPICK_SEXT_ELT, ++ VPICK_ZEXT_ELT, ++ ++ // Vector comparisons ++ VALL_ZERO, ++ VANY_ZERO, ++ VALL_NONZERO, ++ VANY_NONZERO, ++ + // Intrinsic operations end ============================================= + }; + } // end namespace LoongArchISD +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index ef79b8a0dcd3..a5d66ebac96a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -47,6 +47,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + return; + } + ++ // VR->VR copies. ++ if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { ++ BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) ++ .addReg(SrcReg, getKillRegState(KillSrc)) ++ .addImm(0); ++ return; ++ } ++ + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { +@@ -99,6 +107,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( + Opcode = LoongArch::FST_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_D; ++ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::VST; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; + else +@@ -133,6 +143,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + Opcode = LoongArch::FLD_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_D; ++ else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::VLD; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; + else +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index ac391ef471b1..b2c4bb812ba5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -182,7 +182,7 @@ def imm32 : Operand { + let ParserMatchClass = ImmAsmOperand<"", 32, "">; + } + +-def uimm1 : Operand { ++def uimm1 : Operand, ImmLeaf(Imm);}]>{ + let ParserMatchClass = UImmAsmOperand<1>; + } + +@@ -197,11 +197,11 @@ def uimm2_plus1 : Operand, + let DecoderMethod = "decodeUImmOperand<2, 1>"; + } + +-def uimm3 : Operand { ++def uimm3 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<3>; + } + +-def uimm4 : Operand { ++def uimm4 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<4>; + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index a8ed285a37cf..13332be0bc38 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -10,6 +10,146 @@ + // + //===----------------------------------------------------------------------===// + ++def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisInt<1>, SDTCisVec<1>, ++ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; ++def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; ++ ++// Target nodes. ++def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; ++def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", ++ SDT_LoongArchVecCond>; ++def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", ++ SDT_LoongArchVecCond>; ++ ++def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; ++def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; ++ ++class VecCond ++ : Pseudo<(outs GPR:$rd), (ins RC:$vj), ++ [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { ++ let hasSideEffects = 0; ++ let mayLoad = 0; ++ let mayStore = 0; ++ let usesCustomInserter = 1; ++} ++ ++def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), ++ (bitconvert (v4i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; ++}]>; ++def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; ++}]>; ++def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; ++}]>; ++def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), ++ (bitconvert (v4i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def vsplati8imm7 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati8_imm_eq_7)>; ++def vsplati16imm15 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati16_imm_eq_15)>; ++def vsplati32imm31 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati32_imm_eq_31)>; ++def vsplati64imm63 : PatFrag<(ops node:$reg), ++ (and node:$reg, vsplati64_imm_eq_63)>; ++ ++foreach N = [3, 4, 5, 6, 8] in ++ def SplatPat_uimm#N : ComplexPattern", ++ [build_vector, bitconvert], [], 2>; ++ ++foreach N = [5] in ++ def SplatPat_simm#N : ComplexPattern", ++ [build_vector, bitconvert]>; ++ ++def vsplat_uimm_inv_pow2 : ComplexPattern; ++ ++def vsplat_uimm_pow2 : ComplexPattern; ++ ++def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (add node:$vd, (mul node:$vj, node:$vk))>; ++ ++def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (sub node:$vd, (mul node:$vj, node:$vk))>; ++ ++def lsxsplati8 : PatFrag<(ops node:$e0), ++ (v16i8 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplati16 : PatFrag<(ops node:$e0), ++ (v8i16 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplati32 : PatFrag<(ops node:$e0), ++ (v4i32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++ ++def lsxsplati64 : PatFrag<(ops node:$e0), ++ (v2i64 (build_vector node:$e0, node:$e0))>; ++ ++def to_valide_timm : SDNodeXForm(N); ++ return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); ++}]>; ++ + //===----------------------------------------------------------------------===// + // Instruction class templates + //===----------------------------------------------------------------------===// +@@ -1004,4 +1144,680 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.d", "$vd, $imm">; + } + ++def PseudoVBNZ_B : VecCond; ++def PseudoVBNZ_H : VecCond; ++def PseudoVBNZ_W : VecCond; ++def PseudoVBNZ_D : VecCond; ++def PseudoVBNZ : VecCond; ++ ++def PseudoVBZ_B : VecCond; ++def PseudoVBZ_H : VecCond; ++def PseudoVBZ_W : VecCond; ++def PseudoVBZ_D : VecCond; ++def PseudoVBZ : VecCond; ++ ++} // Predicates = [HasExtLSX] ++ ++multiclass PatVr { ++ def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), ++ (!cast(Inst#"_B") LSX128:$vj)>; ++ def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), ++ (!cast(Inst#"_H") LSX128:$vj)>; ++ def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), ++ (!cast(Inst#"_W") LSX128:$vj)>; ++ def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), ++ (!cast(Inst#"_D") LSX128:$vj)>; ++} ++ ++multiclass PatVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrVrF { ++ def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), ++ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrVrU { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatVrSimm5 { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; ++} ++ ++multiclass PatVrUimm5 { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; ++} ++ ++multiclass PatVrVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatShiftVrVr { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, ++ (v16i8 LSX128:$vk))), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, ++ (v8i16 LSX128:$vk))), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, ++ (v4i32 LSX128:$vk))), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, ++ (v2i64 LSX128:$vk))), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatShiftVrUimm { ++ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), ++ (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; ++ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), ++ (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; ++ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), ++ (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; ++} ++ ++class PatVrVrB ++ : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (Inst LSX128:$vj, LSX128:$vk)>; ++ ++let Predicates = [HasExtLSX] in { ++ ++// VADD_{B/H/W/D} ++defm : PatVrVr; ++// VSUB_{B/H/W/D} ++defm : PatVrVr; ++ ++// VADDI_{B/H/W/D}U ++defm : PatVrUimm5; ++// VSUBI_{B/H/W/D}U ++defm : PatVrUimm5; ++ ++// VNEG_{B/H/W/D} ++def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; ++def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; ++ ++// VMAX[I]_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++defm : PatVrSimm5; ++defm : PatVrUimm5; ++ ++// VMIN[I]_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++defm : PatVrSimm5; ++defm : PatVrUimm5; ++ ++// VMUL_{B/H/W/D} ++defm : PatVrVr; ++ ++// VMADD_{B/H/W/D} ++defm : PatVrVrVr; ++// VMSUB_{B/H/W/D} ++defm : PatVrVrVr; ++ ++// VDIV_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ ++// VMOD_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ ++// VAND_V ++def : PatVrVrB; ++// VNOR_V ++def : PatVrVrB; ++// VXOR_V ++def : PatVrVrB; ++// VNOR_V ++def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), ++ (VNOR_V LSX128:$vj, LSX128:$vk)>; ++ ++// VANDI_B ++def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VANDI_B LSX128:$vj, uimm8:$imm)>; ++// VORI_B ++def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VORI_B LSX128:$vj, uimm8:$imm)>; ++ ++// VXORI_B ++def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), ++ (VXORI_B LSX128:$vj, uimm8:$imm)>; ++ ++// VSLL[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VSRL[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VSRA[I]_{B/H/W/D} ++defm : PatVrVr; ++defm : PatShiftVrVr; ++defm : PatShiftVrUimm; ++ ++// VPCNT_{B/H/W/D} ++defm : PatVr; ++ ++// VBITCLR_{B/H/W/D} ++def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), ++ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), ++ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), ++ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), ++ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v16i8:$vk)))), ++ (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v8i16:$vk)))), ++ (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v4i32:$vk)))), ++ (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati64imm63 v2i64:$vk)))), ++ (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITCLRI_{B/H/W/D} ++def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), ++ (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), ++ (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), ++ (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), ++ (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VBITSET_{B/H/W/D} ++def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), ++ (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), ++ (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), ++ (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), ++ (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITSETI_{B/H/W/D} ++def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (VBITSETI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (VBITSETI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (VBITSETI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (VBITSETI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VBITREV_{B/H/W/D} ++def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; ++def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), ++ (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; ++def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), ++ (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; ++def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), ++ (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; ++def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), ++ (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; ++ ++// VBITREVI_{B/H/W/D} ++def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (VBITREVI_B LSX128:$vj, uimm3:$imm)>; ++def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (VBITREVI_H LSX128:$vj, uimm4:$imm)>; ++def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (VBITREVI_W LSX128:$vj, uimm5:$imm)>; ++def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (VBITREVI_D LSX128:$vj, uimm6:$imm)>; ++ ++// VFADD_{S/D} ++defm : PatVrVrF; ++ ++// VFSUB_{S/D} ++defm : PatVrVrF; ++ ++// VFMUL_{S/D} ++defm : PatVrVrF; ++ ++// VFDIV_{S/D} ++defm : PatVrVrF; ++ ++// VFMADD_{S/D} ++def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), ++ (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), ++ (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VINSGR2VR_{B/H/W/D} ++def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), ++ (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; ++def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), ++ (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; ++def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), ++ (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; ++def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), ++ (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; ++ ++// VPICKVE2GR_{B/H/W}[U] ++def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), ++ (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; ++def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), ++ (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), ++ (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; ++ ++def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), ++ (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), ++ (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), ++ (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; ++ ++// VREPLGR2VR_{B/H/W/D} ++def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; ++def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; ++def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; ++def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; ++ ++// VREPLVE_{B/H/W/D} ++def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), ++ (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), ++ (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), ++ (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), ++ (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; ++ ++// Loads/Stores ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in { ++ defm : LdPat; ++ def : RegRegLdPat; ++ defm : StPat; ++ def : RegRegStPat; ++} ++ ++} // Predicates = [HasExtLSX] ++ ++/// Intrinsic pattern ++ ++class deriveLSXIntrinsic { ++ Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); ++} ++ ++let Predicates = [HasExtLSX] in { ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj, vty:$vk), ++// (LAInst vty:$vj, vty:$vk)>; ++foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", ++ "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", ++ "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", ++ "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", ++ "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", ++ "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", ++ "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", ++ "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", ++ "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", ++ "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", ++ "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", ++ "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", ++ "VILVL_B", "VILVH_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", ++ "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", ++ "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", ++ "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", ++ "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", ++ "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", ++ "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", ++ "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", ++ "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", ++ "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", ++ "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", ++ "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", ++ "VSSRARN_BU_H", ++ "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", ++ "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", ++ "VILVL_H", "VILVH_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", ++ "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", ++ "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", ++ "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", ++ "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", ++ "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", ++ "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", ++ "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", ++ "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", ++ "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", ++ "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", ++ "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", ++ "VSSRARN_HU_W", ++ "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", ++ "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", ++ "VILVL_W", "VILVH_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VADD_Q", "VSUB_Q", ++ "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", ++ "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", ++ "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", ++ "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", ++ "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", ++ "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", ++ "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", ++ "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", ++ "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", ++ "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", ++ "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", ++ "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", ++ "VSSRARN_WU_D", "VFFINT_S_L", ++ "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", ++ "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", ++ "VILVL_D", "VILVH_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), ++// (LAInst vty:$vd, vty:$vj, vty:$vk)>; ++foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", ++ "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", ++ "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", ++ "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", ++ "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj), ++// (LAInst vty:$vj)>; ++foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", ++ "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", ++ "VCLO_B", "VCLZ_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", ++ "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", ++ "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", ++ "VFFINTL_D_W", "VFFINTH_D_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", ++ "VEXTL_Q_D", "VEXTL_QU_DU", ++ "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++ ++// Pat<(Intrinsic timm:$imm) ++// (LAInst timm:$imm)>; ++def : Pat<(int_loongarch_lsx_vldi timm:$imm), ++ (VLDI (to_valide_timm timm:$imm))>; ++foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret timm:$imm), ++ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vj, timm:$imm) ++// (LAInst vty:$vj, timm:$imm)>; ++foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", ++ "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", ++ "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", ++ "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", ++ "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", ++ "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", ++ "VREPLVEI_H", "VSHUF4I_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", ++ "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", ++ "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", ++ "VREPLVEI_W", "VSHUF4I_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", ++ "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", ++ "VPICKVE2GR_D", "VPICKVE2GR_DU", ++ "VREPLVEI_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) ++// (LAInst vty:$vd, vty:$vj, timm:$imm)>; ++foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", ++ "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", ++ "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", ++ "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", ++ "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", ++ "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", ++ "VFRSTPI_H", "VEXTRINS_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", ++ "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", ++ "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", ++ "VPERMI_W", "VEXTRINS_W"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", ++ "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", ++ "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", ++ "VSHUF4I_D", "VEXTRINS_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, ++ (to_valide_timm timm:$imm))>; ++ ++// vty: v16i8/v8i16/v4i32/v2i64 ++// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), ++// (LAInst vty:$vd, vty:$vj, vty:$vk)>; ++foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VFRSTP_H", "VSHUF_H"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), ++ (v4i32 LSX128:$vk)), ++ (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), ++ (v2i64 LSX128:$vk)), ++ (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), ++// (LAInst vty:$vj, vty:$vk, vty:$va)>; ++foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj, vty:$vk), ++// (LAInst vty:$vj, vty:$vk)>; ++foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", ++ "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", ++ "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", ++ "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", ++ "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", ++ "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", ++ "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", ++ "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", ++ "VFTINT_W_D", ++ "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", ++ "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", ++ "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", ++ "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", ++ "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", ++ "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret ++ (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), ++ (!cast(Inst) LSX128:$vj, LSX128:$vk)>; ++ ++// vty: v4f32/v2f64 ++// Pat<(Intrinsic vty:$vj), ++// (LAInst vty:$vj)>; ++foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", ++ "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", ++ "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", ++ "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", ++ "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", ++ "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", ++ "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", ++ "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", ++ "VFTINTH_L_S"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", ++ "VFRINT_D", ++ "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", ++ "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", ++ "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in ++ def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), ++ (!cast(Inst) LSX128:$vj)>; ++ ++// load ++def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), ++ (VLD GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), ++ (VLDX GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), ++ (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), ++ (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), ++ (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), ++ (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ ++// store ++def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), ++ (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), ++ (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), ++ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++ + } // Predicates = [HasExtLSX] +-- +2.20.1 + diff --git a/0013-LoongArch-Add-LASX-intrinsic-support.patch b/0013-LoongArch-Add-LASX-intrinsic-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..ef7f501a4c9dfc66ce0f4ca295cd23fc52de31e9 --- /dev/null +++ b/0013-LoongArch-Add-LASX-intrinsic-support.patch @@ -0,0 +1,2241 @@ +From 091fc830c87c713f864c4030d3e750d9b8b144c9 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 19 Aug 2023 16:53:50 +0800 +Subject: [PATCH 13/66] [LoongArch] Add LASX intrinsic support + +This patch is similar to D155829. + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155830 + +(cherry picked from commit 691f0d00b84f6ecaf8e341ef38256e939cca6b1e) + +Change-Id: I85eb27b32b32edd69cc37e51d69d61cb6f4ef52d +--- + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 523 +++++++++++++ + .../LoongArch/LoongArchISelLowering.cpp | 402 +++++++++- + .../Target/LoongArch/LoongArchInstrInfo.cpp | 12 + + .../LoongArch/LoongArchLASXInstrInfo.td | 702 ++++++++++++++++++ + 4 files changed, 1633 insertions(+), 6 deletions(-) + +diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +index d39d8261ebe3..685deaec7709 100644 +--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td ++++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td +@@ -647,3 +647,526 @@ def int_loongarch_lsx_vstelm_d + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + + } // TargetPrefix = "loongarch" ++ ++//===----------------------------------------------------------------------===// ++// LASX ++ ++let TargetPrefix = "loongarch" in { ++foreach inst = ["xvadd_b", "xvsub_b", ++ "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", ++ "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", ++ "xvabsd_b", "xvabsd_bu", "xvadda_b", ++ "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", ++ "xvmul_b", "xvmuh_b", "xvmuh_bu", ++ "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", ++ "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", ++ "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", ++ "xvbitclr_b", "xvbitset_b", "xvbitrev_b", ++ "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", ++ "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", ++ "xvilvl_b", "xvilvh_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_h", "xvsub_h", ++ "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", ++ "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", ++ "xvabsd_h", "xvabsd_hu", "xvadda_h", ++ "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", ++ "xvmul_h", "xvmuh_h", "xvmuh_hu", ++ "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", ++ "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", ++ "xvbitclr_h", "xvbitset_h", "xvbitrev_h", ++ "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", ++ "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", ++ "xvilvl_h", "xvilvh_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_w", "xvsub_w", ++ "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", ++ "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", ++ "xvabsd_w", "xvabsd_wu", "xvadda_w", ++ "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", ++ "xvmul_w", "xvmuh_w", "xvmuh_wu", ++ "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", ++ "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", ++ "xvbitclr_w", "xvbitset_w", "xvbitrev_w", ++ "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", ++ "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", ++ "xvilvl_w", "xvilvh_w", "xvperm_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", ++ "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", ++ "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", ++ "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", ++ "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", ++ "xvaddwev_q_du_d", "xvaddwod_q_du_d", ++ "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", ++ "xvabsd_d", "xvabsd_du", "xvadda_d", ++ "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", ++ "xvmul_d", "xvmuh_d", "xvmuh_du", ++ "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", ++ "xvmulwev_q_du_d", "xvmulwod_q_du_d", ++ "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", ++ "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", ++ "xvbitclr_d", "xvbitset_d", "xvbitrev_d", ++ "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", ++ "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", ++ "xvilvl_d", "xvilvh_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvaddi_bu", "xvsubi_bu", ++ "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", ++ "xvsat_b", "xvsat_bu", ++ "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", ++ "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", ++ "xvsrlri_b", "xvsrari_b", ++ "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", ++ "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", ++ "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_hu", "xvsubi_hu", ++ "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", ++ "xvsat_h", "xvsat_hu", ++ "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", ++ "xvsrlri_h", "xvsrari_h", ++ "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", ++ "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", ++ "xvrepl128vei_h", "xvshuf4i_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_wu", "xvsubi_wu", ++ "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", ++ "xvsat_w", "xvsat_wu", ++ "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", ++ "xvsrlri_w", "xvsrari_w", ++ "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", ++ "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", ++ "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvaddi_du", "xvsubi_du", ++ "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", ++ "xvsat_d", "xvsat_du", ++ "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", ++ "xvsrlri_d", "xvsrari_d", ++ "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", ++ "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", ++ "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", ++ "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", ++ "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", ++ "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", ++ "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", ++ "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", ++ "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", ++ "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", ++ "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", ++ "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", ++ "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", ++ "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", ++ "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", ++ "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", ++ "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", ++ "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", ++ "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", ++ "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", ++ "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", ++ "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", ++ "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", ++ "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", ++ "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", ++ "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", ++ "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v32i8_ty], ++ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", ++ "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", ++ "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", ++ "xvfrstpi_h", "xvextrins_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", ++ "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", ++ "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", ++ "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", ++ "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", ++ "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", ++ "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", ++ "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v16i16_ty], ++ [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", ++ "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8i32_ty], ++ [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", ++ "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", ++ "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4i64_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v32i8_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v16i16_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", ++ "xvclo_b", "xvclz_b", "xvpcnt_b", ++ "xvreplve0_b", "xvreplve0_q"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", ++ "xvreplve0_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", ++ "xvreplve0_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", ++ "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", ++ "xvreplve0_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsgr2vr_w ++ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvinsgr2vr_d ++ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xvreplve_b ++ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_h ++ : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_w ++ : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_d ++ : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], ++ [llvm_v8i32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], ++ [llvm_v4i64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++// LASX Float ++ ++foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", ++ "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", ++ "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v8f32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in ++ def int_loongarch_lasx_#inst ++ : VecInt<[llvm_v4f64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", ++ "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", ++ "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", ++ "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", ++ "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", ++ "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", ++ "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", ++ "xvftinth_l_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvffint_s_l"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", ++ "xvftint_w_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcvt_h_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcvt_s_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", ++ "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", ++ "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", ++ "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", ++ "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", ++ "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], ++ [llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", ++ "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", ++ "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", ++ "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", ++ "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", ++ "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in ++ def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], ++ [llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve_w_f ++ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++def int_loongarch_lasx_xvpickve_d_f ++ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], ++ [IntrNoMem, ImmArg>]>; ++ ++// LASX load/store ++def int_loongarch_lasx_xvld ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldx ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_b ++ : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_h ++ : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_w ++ : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvldrepl_d ++ : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly, ImmArg>]>; ++ ++def int_loongarch_lasx_xvst ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; ++def int_loongarch_lasx_xvstx ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrWriteMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_b ++ : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_h ++ : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_w ++ : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++def int_loongarch_lasx_xvstelm_d ++ : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; ++} // TargetPrefix = "loongarch" +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index c05133647929..3a40cd06a3eb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -64,11 +64,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; ++ static const MVT::SimpleValueType LASXVTs[] = { ++ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; + + if (Subtarget.hasExtLSX()) + for (MVT VT : LSXVTs) + addRegisterClass(VT, &LoongArch::LSX128RegClass); + ++ if (Subtarget.hasExtLASX()) ++ for (MVT VT : LASXVTs) ++ addRegisterClass(VT, &LoongArch::LASX256RegClass); ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +@@ -207,6 +213,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); + ++ if (Subtarget.hasExtLASX()) ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, ++ {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, ++ Legal); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -695,9 +706,17 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vpickve2gr_d: + case Intrinsic::loongarch_lsx_vpickve2gr_du: + case Intrinsic::loongarch_lsx_vreplvei_d: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_d: + return checkIntrinsicImmArg<1>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vreplvei_w: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_w: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_d: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_du: ++ case Intrinsic::loongarch_lasx_xvpickve_d: ++ case Intrinsic::loongarch_lasx_xvpickve_d_f: + return checkIntrinsicImmArg<2>(Op, 2, DAG); ++ case Intrinsic::loongarch_lasx_xvinsve0_d: ++ return checkIntrinsicImmArg<2>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vrotri_b: +@@ -706,7 +725,19 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vreplvei_h: ++ case Intrinsic::loongarch_lasx_xvsat_b: ++ case Intrinsic::loongarch_lasx_xvsat_bu: ++ case Intrinsic::loongarch_lasx_xvrotri_b: ++ case Intrinsic::loongarch_lasx_xvsllwil_h_b: ++ case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: ++ case Intrinsic::loongarch_lasx_xvsrlri_b: ++ case Intrinsic::loongarch_lasx_xvsrari_b: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_h: ++ case Intrinsic::loongarch_lasx_xvpickve_w: ++ case Intrinsic::loongarch_lasx_xvpickve_w_f: + return checkIntrinsicImmArg<3>(Op, 2, DAG); ++ case Intrinsic::loongarch_lasx_xvinsve0_w: ++ return checkIntrinsicImmArg<3>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vrotri_h: +@@ -715,6 +746,14 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vreplvei_b: ++ case Intrinsic::loongarch_lasx_xvsat_h: ++ case Intrinsic::loongarch_lasx_xvsat_hu: ++ case Intrinsic::loongarch_lasx_xvrotri_h: ++ case Intrinsic::loongarch_lasx_xvsllwil_w_h: ++ case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: ++ case Intrinsic::loongarch_lasx_xvsrlri_h: ++ case Intrinsic::loongarch_lasx_xvsrari_h: ++ case Intrinsic::loongarch_lasx_xvrepl128vei_b: + return checkIntrinsicImmArg<4>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_b_h: + case Intrinsic::loongarch_lsx_vsrani_b_h: +@@ -728,6 +767,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlrni_bu_h: + case Intrinsic::loongarch_lsx_vssrarni_bu_h: ++ case Intrinsic::loongarch_lasx_xvsrlni_b_h: ++ case Intrinsic::loongarch_lasx_xvsrani_b_h: ++ case Intrinsic::loongarch_lasx_xvsrlrni_b_h: ++ case Intrinsic::loongarch_lasx_xvsrarni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrani_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlni_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrani_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrlrni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrarni_b_h: ++ case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: ++ case Intrinsic::loongarch_lasx_xvssrarni_bu_h: + return checkIntrinsicImmArg<4>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: +@@ -746,6 +797,23 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vslti_du: + case Intrinsic::loongarch_lsx_vbsll_v: + case Intrinsic::loongarch_lsx_vbsrl_v: ++ case Intrinsic::loongarch_lasx_xvsat_w: ++ case Intrinsic::loongarch_lasx_xvsat_wu: ++ case Intrinsic::loongarch_lasx_xvrotri_w: ++ case Intrinsic::loongarch_lasx_xvsllwil_d_w: ++ case Intrinsic::loongarch_lasx_xvsllwil_du_wu: ++ case Intrinsic::loongarch_lasx_xvsrlri_w: ++ case Intrinsic::loongarch_lasx_xvsrari_w: ++ case Intrinsic::loongarch_lasx_xvslei_bu: ++ case Intrinsic::loongarch_lasx_xvslei_hu: ++ case Intrinsic::loongarch_lasx_xvslei_wu: ++ case Intrinsic::loongarch_lasx_xvslei_du: ++ case Intrinsic::loongarch_lasx_xvslti_bu: ++ case Intrinsic::loongarch_lasx_xvslti_hu: ++ case Intrinsic::loongarch_lasx_xvslti_wu: ++ case Intrinsic::loongarch_lasx_xvslti_du: ++ case Intrinsic::loongarch_lasx_xvbsll_v: ++ case Intrinsic::loongarch_lasx_xvbsrl_v: + return checkIntrinsicImmArg<5>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vseqi_b: + case Intrinsic::loongarch_lsx_vseqi_h: +@@ -759,6 +827,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vslti_h: + case Intrinsic::loongarch_lsx_vslti_w: + case Intrinsic::loongarch_lsx_vslti_d: ++ case Intrinsic::loongarch_lasx_xvseqi_b: ++ case Intrinsic::loongarch_lasx_xvseqi_h: ++ case Intrinsic::loongarch_lasx_xvseqi_w: ++ case Intrinsic::loongarch_lasx_xvseqi_d: ++ case Intrinsic::loongarch_lasx_xvslei_b: ++ case Intrinsic::loongarch_lasx_xvslei_h: ++ case Intrinsic::loongarch_lasx_xvslei_w: ++ case Intrinsic::loongarch_lasx_xvslei_d: ++ case Intrinsic::loongarch_lasx_xvslti_b: ++ case Intrinsic::loongarch_lasx_xvslti_h: ++ case Intrinsic::loongarch_lasx_xvslti_w: ++ case Intrinsic::loongarch_lasx_xvslti_d: + return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vsrlni_h_w: + case Intrinsic::loongarch_lsx_vsrani_h_w: +@@ -774,12 +854,31 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_hu_w: + case Intrinsic::loongarch_lsx_vfrstpi_b: + case Intrinsic::loongarch_lsx_vfrstpi_h: ++ case Intrinsic::loongarch_lasx_xvsrlni_h_w: ++ case Intrinsic::loongarch_lasx_xvsrani_h_w: ++ case Intrinsic::loongarch_lasx_xvsrlrni_h_w: ++ case Intrinsic::loongarch_lasx_xvsrarni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrani_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlni_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrani_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrlrni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrarni_h_w: ++ case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: ++ case Intrinsic::loongarch_lasx_xvssrarni_hu_w: ++ case Intrinsic::loongarch_lasx_xvfrstpi_b: ++ case Intrinsic::loongarch_lasx_xvfrstpi_h: + return checkIntrinsicImmArg<5>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + case Intrinsic::loongarch_lsx_vrotri_d: + case Intrinsic::loongarch_lsx_vsrlri_d: + case Intrinsic::loongarch_lsx_vsrari_d: ++ case Intrinsic::loongarch_lasx_xvsat_d: ++ case Intrinsic::loongarch_lasx_xvsat_du: ++ case Intrinsic::loongarch_lasx_xvrotri_d: ++ case Intrinsic::loongarch_lasx_xvsrlri_d: ++ case Intrinsic::loongarch_lasx_xvsrari_d: + return checkIntrinsicImmArg<6>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_w_d: + case Intrinsic::loongarch_lsx_vsrani_w_d: +@@ -793,6 +892,18 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlrni_wu_d: + case Intrinsic::loongarch_lsx_vssrarni_wu_d: ++ case Intrinsic::loongarch_lasx_xvsrlni_w_d: ++ case Intrinsic::loongarch_lasx_xvsrani_w_d: ++ case Intrinsic::loongarch_lasx_xvsrlrni_w_d: ++ case Intrinsic::loongarch_lasx_xvsrarni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrani_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlni_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrani_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrlrni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrarni_w_d: ++ case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: ++ case Intrinsic::loongarch_lasx_xvssrarni_wu_d: + return checkIntrinsicImmArg<6>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsrlni_d_q: + case Intrinsic::loongarch_lsx_vsrani_d_q: +@@ -806,11 +917,28 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vssrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlrni_du_q: + case Intrinsic::loongarch_lsx_vssrarni_du_q: ++ case Intrinsic::loongarch_lasx_xvsrlni_d_q: ++ case Intrinsic::loongarch_lasx_xvsrani_d_q: ++ case Intrinsic::loongarch_lasx_xvsrlrni_d_q: ++ case Intrinsic::loongarch_lasx_xvsrarni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrani_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlni_du_q: ++ case Intrinsic::loongarch_lasx_xvssrani_du_q: ++ case Intrinsic::loongarch_lasx_xvssrlrni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrarni_d_q: ++ case Intrinsic::loongarch_lasx_xvssrlrni_du_q: ++ case Intrinsic::loongarch_lasx_xvssrarni_du_q: + return checkIntrinsicImmArg<7>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vnori_b: + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: ++ case Intrinsic::loongarch_lasx_xvnori_b: ++ case Intrinsic::loongarch_lasx_xvshuf4i_b: ++ case Intrinsic::loongarch_lasx_xvshuf4i_h: ++ case Intrinsic::loongarch_lasx_xvshuf4i_w: ++ case Intrinsic::loongarch_lasx_xvpermi_d: + return checkIntrinsicImmArg<8>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vshuf4i_d: + case Intrinsic::loongarch_lsx_vpermi_w: +@@ -819,13 +947,26 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + case Intrinsic::loongarch_lsx_vextrins_h: + case Intrinsic::loongarch_lsx_vextrins_w: + case Intrinsic::loongarch_lsx_vextrins_d: ++ case Intrinsic::loongarch_lasx_xvshuf4i_d: ++ case Intrinsic::loongarch_lasx_xvpermi_w: ++ case Intrinsic::loongarch_lasx_xvpermi_q: ++ case Intrinsic::loongarch_lasx_xvbitseli_b: ++ case Intrinsic::loongarch_lasx_xvextrins_b: ++ case Intrinsic::loongarch_lasx_xvextrins_h: ++ case Intrinsic::loongarch_lasx_xvextrins_w: ++ case Intrinsic::loongarch_lasx_xvextrins_d: + return checkIntrinsicImmArg<8>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vrepli_b: + case Intrinsic::loongarch_lsx_vrepli_h: + case Intrinsic::loongarch_lsx_vrepli_w: + case Intrinsic::loongarch_lsx_vrepli_d: ++ case Intrinsic::loongarch_lasx_xvrepli_b: ++ case Intrinsic::loongarch_lasx_xvrepli_h: ++ case Intrinsic::loongarch_lasx_xvrepli_w: ++ case Intrinsic::loongarch_lasx_xvrepli_d: + return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vldi: ++ case Intrinsic::loongarch_lasx_xvldi: + return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); + } + } +@@ -924,22 +1065,27 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + } + case Intrinsic::loongarch_lsx_vld: + case Intrinsic::loongarch_lsx_vldrepl_b: ++ case Intrinsic::loongarch_lasx_xvld: ++ case Intrinsic::loongarch_lasx_xvldrepl_b: + return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_h: ++ case Intrinsic::loongarch_lasx_xvldrepl_h: + return !isShiftedInt<11, 1>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_w: ++ case Intrinsic::loongarch_lasx_xvldrepl_w: + return !isShiftedInt<10, 2>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_d: ++ case Intrinsic::loongarch_lasx_xvldrepl_d: + return !isShiftedInt<9, 3>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( +@@ -1064,14 +1210,27 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + : Op; + } + case Intrinsic::loongarch_lsx_vst: ++ case Intrinsic::loongarch_lasx_xvst: + return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_b: ++ return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_h: ++ return (!isShiftedInt<8, 1>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 2", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1079,6 +1238,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_w: ++ return (!isShiftedInt<8, 2>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 4", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1086,6 +1252,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); ++ case Intrinsic::loongarch_lasx_xvstelm_d: ++ return (!isShiftedInt<8, 3>( ++ cast(Op.getOperand(4))->getSExtValue()) || ++ !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) ++ ? emitIntrinsicErrorMessage( ++ Op, "argument out of range or not a multiple of 8", DAG) ++ : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || +@@ -1304,6 +1477,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_h: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_w: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; +@@ -1316,6 +1490,7 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_hu: ++ case Intrinsic::loongarch_lasx_xvpickve2gr_wu: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; +@@ -1327,10 +1502,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + case Intrinsic::loongarch_lsx_bz_h: + case Intrinsic::loongarch_lsx_bz_w: + case Intrinsic::loongarch_lsx_bz_d: ++ case Intrinsic::loongarch_lasx_xbz_b: ++ case Intrinsic::loongarch_lasx_xbz_h: ++ case Intrinsic::loongarch_lasx_xbz_w: ++ case Intrinsic::loongarch_lasx_xbz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_ZERO); + break; + case Intrinsic::loongarch_lsx_bz_v: ++ case Intrinsic::loongarch_lasx_xbz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_ZERO); + break; +@@ -1338,10 +1518,15 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + case Intrinsic::loongarch_lsx_bnz_h: + case Intrinsic::loongarch_lsx_bnz_w: + case Intrinsic::loongarch_lsx_bnz_d: ++ case Intrinsic::loongarch_lasx_xbnz_b: ++ case Intrinsic::loongarch_lasx_xbnz_h: ++ case Intrinsic::loongarch_lasx_xbnz_w: ++ case Intrinsic::loongarch_lasx_xbnz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_NONZERO); + break; + case Intrinsic::loongarch_lsx_bnz_v: ++ case Intrinsic::loongarch_lasx_xbnz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_NONZERO); + break; +@@ -2114,30 +2299,50 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vadd_h: + case Intrinsic::loongarch_lsx_vadd_w: + case Intrinsic::loongarch_lsx_vadd_d: ++ case Intrinsic::loongarch_lasx_xvadd_b: ++ case Intrinsic::loongarch_lasx_xvadd_h: ++ case Intrinsic::loongarch_lasx_xvadd_w: ++ case Intrinsic::loongarch_lasx_xvadd_d: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: ++ case Intrinsic::loongarch_lasx_xvaddi_bu: ++ case Intrinsic::loongarch_lasx_xvaddi_hu: ++ case Intrinsic::loongarch_lasx_xvaddi_wu: ++ case Intrinsic::loongarch_lasx_xvaddi_du: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsub_b: + case Intrinsic::loongarch_lsx_vsub_h: + case Intrinsic::loongarch_lsx_vsub_w: + case Intrinsic::loongarch_lsx_vsub_d: ++ case Intrinsic::loongarch_lasx_xvsub_b: ++ case Intrinsic::loongarch_lasx_xvsub_h: ++ case Intrinsic::loongarch_lasx_xvsub_w: ++ case Intrinsic::loongarch_lasx_xvsub_d: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: ++ case Intrinsic::loongarch_lasx_xvsubi_bu: ++ case Intrinsic::loongarch_lasx_xvsubi_hu: ++ case Intrinsic::loongarch_lasx_xvsubi_wu: ++ case Intrinsic::loongarch_lasx_xvsubi_du: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vneg_b: + case Intrinsic::loongarch_lsx_vneg_h: + case Intrinsic::loongarch_lsx_vneg_w: + case Intrinsic::loongarch_lsx_vneg_d: ++ case Intrinsic::loongarch_lasx_xvneg_b: ++ case Intrinsic::loongarch_lasx_xvneg_h: ++ case Intrinsic::loongarch_lasx_xvneg_w: ++ case Intrinsic::loongarch_lasx_xvneg_d: + return DAG.getNode( + ISD::SUB, DL, N->getValueType(0), + DAG.getConstant( +@@ -2149,60 +2354,100 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: ++ case Intrinsic::loongarch_lasx_xvmax_b: ++ case Intrinsic::loongarch_lasx_xvmax_h: ++ case Intrinsic::loongarch_lasx_xvmax_w: ++ case Intrinsic::loongarch_lasx_xvmax_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: ++ case Intrinsic::loongarch_lasx_xvmax_bu: ++ case Intrinsic::loongarch_lasx_xvmax_hu: ++ case Intrinsic::loongarch_lasx_xvmax_wu: ++ case Intrinsic::loongarch_lasx_xvmax_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmaxi_b: + case Intrinsic::loongarch_lsx_vmaxi_h: + case Intrinsic::loongarch_lsx_vmaxi_w: + case Intrinsic::loongarch_lsx_vmaxi_d: ++ case Intrinsic::loongarch_lasx_xvmaxi_b: ++ case Intrinsic::loongarch_lasx_xvmaxi_h: ++ case Intrinsic::loongarch_lasx_xvmaxi_w: ++ case Intrinsic::loongarch_lasx_xvmaxi_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmaxi_bu: + case Intrinsic::loongarch_lsx_vmaxi_hu: + case Intrinsic::loongarch_lsx_vmaxi_wu: + case Intrinsic::loongarch_lsx_vmaxi_du: ++ case Intrinsic::loongarch_lasx_xvmaxi_bu: ++ case Intrinsic::loongarch_lasx_xvmaxi_hu: ++ case Intrinsic::loongarch_lasx_xvmaxi_wu: ++ case Intrinsic::loongarch_lasx_xvmaxi_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: ++ case Intrinsic::loongarch_lasx_xvmin_b: ++ case Intrinsic::loongarch_lasx_xvmin_h: ++ case Intrinsic::loongarch_lasx_xvmin_w: ++ case Intrinsic::loongarch_lasx_xvmin_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: ++ case Intrinsic::loongarch_lasx_xvmin_bu: ++ case Intrinsic::loongarch_lasx_xvmin_hu: ++ case Intrinsic::loongarch_lasx_xvmin_wu: ++ case Intrinsic::loongarch_lasx_xvmin_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_b: + case Intrinsic::loongarch_lsx_vmini_h: + case Intrinsic::loongarch_lsx_vmini_w: + case Intrinsic::loongarch_lsx_vmini_d: ++ case Intrinsic::loongarch_lasx_xvmini_b: ++ case Intrinsic::loongarch_lasx_xvmini_h: ++ case Intrinsic::loongarch_lasx_xvmini_w: ++ case Intrinsic::loongarch_lasx_xvmini_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: ++ case Intrinsic::loongarch_lasx_xvmini_bu: ++ case Intrinsic::loongarch_lasx_xvmini_hu: ++ case Intrinsic::loongarch_lasx_xvmini_wu: ++ case Intrinsic::loongarch_lasx_xvmini_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: ++ case Intrinsic::loongarch_lasx_xvmul_b: ++ case Intrinsic::loongarch_lasx_xvmul_h: ++ case Intrinsic::loongarch_lasx_xvmul_w: ++ case Intrinsic::loongarch_lasx_xvmul_d: + return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: +- case Intrinsic::loongarch_lsx_vmadd_d: { ++ case Intrinsic::loongarch_lsx_vmadd_d: ++ case Intrinsic::loongarch_lasx_xvmadd_b: ++ case Intrinsic::loongarch_lasx_xvmadd_h: ++ case Intrinsic::loongarch_lasx_xvmadd_w: ++ case Intrinsic::loongarch_lasx_xvmadd_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), +@@ -2211,7 +2456,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: +- case Intrinsic::loongarch_lsx_vmsub_d: { ++ case Intrinsic::loongarch_lsx_vmsub_d: ++ case Intrinsic::loongarch_lasx_xvmsub_b: ++ case Intrinsic::loongarch_lasx_xvmsub_h: ++ case Intrinsic::loongarch_lasx_xvmsub_w: ++ case Intrinsic::loongarch_lasx_xvmsub_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), +@@ -2221,125 +2470,188 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: ++ case Intrinsic::loongarch_lasx_xvdiv_b: ++ case Intrinsic::loongarch_lasx_xvdiv_h: ++ case Intrinsic::loongarch_lasx_xvdiv_w: ++ case Intrinsic::loongarch_lasx_xvdiv_d: + return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: ++ case Intrinsic::loongarch_lasx_xvdiv_bu: ++ case Intrinsic::loongarch_lasx_xvdiv_hu: ++ case Intrinsic::loongarch_lasx_xvdiv_wu: ++ case Intrinsic::loongarch_lasx_xvdiv_du: + return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: ++ case Intrinsic::loongarch_lasx_xvmod_b: ++ case Intrinsic::loongarch_lasx_xvmod_h: ++ case Intrinsic::loongarch_lasx_xvmod_w: ++ case Intrinsic::loongarch_lasx_xvmod_d: + return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: ++ case Intrinsic::loongarch_lasx_xvmod_bu: ++ case Intrinsic::loongarch_lasx_xvmod_hu: ++ case Intrinsic::loongarch_lasx_xvmod_wu: ++ case Intrinsic::loongarch_lasx_xvmod_du: + return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vand_v: ++ case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vor_v: ++ case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vxor_v: ++ case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); +- case Intrinsic::loongarch_lsx_vnor_v: { ++ case Intrinsic::loongarch_lsx_vnor_v: ++ case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vandi_b: ++ case Intrinsic::loongarch_lasx_xvandi_b: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vori_b: ++ case Intrinsic::loongarch_lasx_xvori_b: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vxori_b: ++ case Intrinsic::loongarch_lasx_xvxori_b: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: ++ case Intrinsic::loongarch_lasx_xvsll_b: ++ case Intrinsic::loongarch_lasx_xvsll_h: ++ case Intrinsic::loongarch_lasx_xvsll_w: ++ case Intrinsic::loongarch_lasx_xvsll_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: ++ case Intrinsic::loongarch_lasx_xvslli_b: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_h: ++ case Intrinsic::loongarch_lasx_xvslli_h: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_w: ++ case Intrinsic::loongarch_lasx_xvslli_w: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_d: ++ case Intrinsic::loongarch_lasx_xvslli_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: ++ case Intrinsic::loongarch_lasx_xvsrl_b: ++ case Intrinsic::loongarch_lasx_xvsrl_h: ++ case Intrinsic::loongarch_lasx_xvsrl_w: ++ case Intrinsic::loongarch_lasx_xvsrl_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: ++ case Intrinsic::loongarch_lasx_xvsrli_b: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_h: ++ case Intrinsic::loongarch_lasx_xvsrli_h: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_w: ++ case Intrinsic::loongarch_lasx_xvsrli_w: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_d: ++ case Intrinsic::loongarch_lasx_xvsrli_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: ++ case Intrinsic::loongarch_lasx_xvsra_b: ++ case Intrinsic::loongarch_lasx_xvsra_h: ++ case Intrinsic::loongarch_lasx_xvsra_w: ++ case Intrinsic::loongarch_lasx_xvsra_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrai_b: ++ case Intrinsic::loongarch_lasx_xvsrai_b: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_h: ++ case Intrinsic::loongarch_lasx_xvsrai_h: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_w: ++ case Intrinsic::loongarch_lasx_xvsrai_w: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_d: ++ case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: ++ case Intrinsic::loongarch_lasx_xvpcnt_b: ++ case Intrinsic::loongarch_lasx_xvpcnt_h: ++ case Intrinsic::loongarch_lasx_xvpcnt_w: ++ case Intrinsic::loongarch_lasx_xvpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: ++ case Intrinsic::loongarch_lasx_xvbitclr_b: ++ case Intrinsic::loongarch_lasx_xvbitclr_h: ++ case Intrinsic::loongarch_lasx_xvbitclr_w: ++ case Intrinsic::loongarch_lasx_xvbitclr_d: + return lowerVectorBitClear(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_b: ++ case Intrinsic::loongarch_lasx_xvbitclri_b: + return lowerVectorBitClearImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_h: ++ case Intrinsic::loongarch_lasx_xvbitclri_h: + return lowerVectorBitClearImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_w: ++ case Intrinsic::loongarch_lasx_xvbitclri_w: + return lowerVectorBitClearImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_d: ++ case Intrinsic::loongarch_lasx_xvbitclri_d: + return lowerVectorBitClearImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitset_b: + case Intrinsic::loongarch_lsx_vbitset_h: + case Intrinsic::loongarch_lsx_vbitset_w: +- case Intrinsic::loongarch_lsx_vbitset_d: { ++ case Intrinsic::loongarch_lsx_vbitset_d: ++ case Intrinsic::loongarch_lasx_xvbitset_b: ++ case Intrinsic::loongarch_lasx_xvbitset_h: ++ case Intrinsic::loongarch_lasx_xvbitset_w: ++ case Intrinsic::loongarch_lasx_xvbitset_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( +@@ -2347,17 +2659,25 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitseti_b: ++ case Intrinsic::loongarch_lasx_xvbitseti_b: + return lowerVectorBitSetImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_h: ++ case Intrinsic::loongarch_lasx_xvbitseti_h: + return lowerVectorBitSetImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_w: ++ case Intrinsic::loongarch_lasx_xvbitseti_w: + return lowerVectorBitSetImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_d: ++ case Intrinsic::loongarch_lasx_xvbitseti_d: + return lowerVectorBitSetImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrev_b: + case Intrinsic::loongarch_lsx_vbitrev_h: + case Intrinsic::loongarch_lsx_vbitrev_w: +- case Intrinsic::loongarch_lsx_vbitrev_d: { ++ case Intrinsic::loongarch_lsx_vbitrev_d: ++ case Intrinsic::loongarch_lasx_xvbitrev_b: ++ case Intrinsic::loongarch_lasx_xvbitrev_h: ++ case Intrinsic::loongarch_lasx_xvbitrev_w: ++ case Intrinsic::loongarch_lasx_xvbitrev_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( +@@ -2365,31 +2685,45 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitrevi_b: ++ case Intrinsic::loongarch_lasx_xvbitrevi_b: + return lowerVectorBitRevImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_h: ++ case Intrinsic::loongarch_lasx_xvbitrevi_h: + return lowerVectorBitRevImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_w: ++ case Intrinsic::loongarch_lasx_xvbitrevi_w: + return lowerVectorBitRevImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_d: ++ case Intrinsic::loongarch_lasx_xvbitrevi_d: + return lowerVectorBitRevImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vfadd_s: + case Intrinsic::loongarch_lsx_vfadd_d: ++ case Intrinsic::loongarch_lasx_xvfadd_s: ++ case Intrinsic::loongarch_lasx_xvfadd_d: + return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfsub_s: + case Intrinsic::loongarch_lsx_vfsub_d: ++ case Intrinsic::loongarch_lasx_xvfsub_s: ++ case Intrinsic::loongarch_lasx_xvfsub_d: + return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: ++ case Intrinsic::loongarch_lasx_xvfmul_s: ++ case Intrinsic::loongarch_lasx_xvfmul_d: + return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: ++ case Intrinsic::loongarch_lasx_xvfdiv_s: ++ case Intrinsic::loongarch_lasx_xvfdiv_d: + return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmadd_s: + case Intrinsic::loongarch_lsx_vfmadd_d: ++ case Intrinsic::loongarch_lasx_xvfmadd_s: ++ case Intrinsic::loongarch_lasx_xvfmadd_d: + return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + case Intrinsic::loongarch_lsx_vinsgr2vr_b: +@@ -2397,10 +2731,12 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_h: ++ case Intrinsic::loongarch_lasx_xvinsgr2vr_w: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_w: ++ case Intrinsic::loongarch_lasx_xvinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); +@@ -2411,7 +2747,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vreplgr2vr_b: + case Intrinsic::loongarch_lsx_vreplgr2vr_h: + case Intrinsic::loongarch_lsx_vreplgr2vr_w: +- case Intrinsic::loongarch_lsx_vreplgr2vr_d: { ++ case Intrinsic::loongarch_lsx_vreplgr2vr_d: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_b: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_h: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_w: ++ case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { + EVT ResTy = N->getValueType(0); + SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); + return DAG.getBuildVector(ResTy, DL, Ops); +@@ -2420,6 +2760,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: ++ case Intrinsic::loongarch_lasx_xvreplve_b: ++ case Intrinsic::loongarch_lasx_xvreplve_h: ++ case Intrinsic::loongarch_lasx_xvreplve_w: ++ case Intrinsic::loongarch_lasx_xvreplve_d: + return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), + N->getOperand(1), + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), +@@ -2534,6 +2878,36 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + case LoongArch::PseudoVBNZ_D: + CondOpc = LoongArch::VSETALLNEZ_D; + break; ++ case LoongArch::PseudoXVBZ: ++ CondOpc = LoongArch::XVSETEQZ_V; ++ break; ++ case LoongArch::PseudoXVBZ_B: ++ CondOpc = LoongArch::XVSETANYEQZ_B; ++ break; ++ case LoongArch::PseudoXVBZ_H: ++ CondOpc = LoongArch::XVSETANYEQZ_H; ++ break; ++ case LoongArch::PseudoXVBZ_W: ++ CondOpc = LoongArch::XVSETANYEQZ_W; ++ break; ++ case LoongArch::PseudoXVBZ_D: ++ CondOpc = LoongArch::XVSETANYEQZ_D; ++ break; ++ case LoongArch::PseudoXVBNZ: ++ CondOpc = LoongArch::XVSETNEZ_V; ++ break; ++ case LoongArch::PseudoXVBNZ_B: ++ CondOpc = LoongArch::XVSETALLNEZ_B; ++ break; ++ case LoongArch::PseudoXVBNZ_H: ++ CondOpc = LoongArch::XVSETALLNEZ_H; ++ break; ++ case LoongArch::PseudoXVBNZ_W: ++ CondOpc = LoongArch::XVSETALLNEZ_W; ++ break; ++ case LoongArch::PseudoXVBNZ_D: ++ CondOpc = LoongArch::XVSETALLNEZ_D; ++ break; + } + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -2636,6 +3010,16 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + case LoongArch::PseudoVBNZ_H: + case LoongArch::PseudoVBNZ_W: + case LoongArch::PseudoVBNZ_D: ++ case LoongArch::PseudoXVBZ: ++ case LoongArch::PseudoXVBZ_B: ++ case LoongArch::PseudoXVBZ_H: ++ case LoongArch::PseudoXVBZ_W: ++ case LoongArch::PseudoXVBZ_D: ++ case LoongArch::PseudoXVBNZ: ++ case LoongArch::PseudoXVBNZ_B: ++ case LoongArch::PseudoXVBNZ_H: ++ case LoongArch::PseudoXVBNZ_W: ++ case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); + } + } +@@ -2746,6 +3130,10 @@ const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, + LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, + LoongArch::VR6, LoongArch::VR7}; + ++const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, ++ LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, ++ LoongArch::XR6, LoongArch::XR7}; ++ + // Pass a 2*GRLen argument that has been split into two GRLen values through + // registers or the stack as necessary. + static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, +@@ -2894,6 +3282,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + Reg = State.AllocateReg(ArgFPR64s); + else if (ValVT.is128BitVector()) + Reg = State.AllocateReg(ArgVRs); ++ else if (ValVT.is256BitVector()) ++ Reg = State.AllocateReg(ArgXRs); + else + Reg = State.AllocateReg(ArgGPRs); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index a5d66ebac96a..ddd1c9943fac 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -55,6 +55,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + return; + } + ++ // XR->XR copies. ++ if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { ++ BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) ++ .addReg(SrcReg, getKillRegState(KillSrc)) ++ .addImm(0); ++ return; ++ } ++ + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { +@@ -109,6 +117,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( + Opcode = LoongArch::FST_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VST; ++ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::XVST; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; + else +@@ -145,6 +155,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + Opcode = LoongArch::FLD_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VLD; ++ else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) ++ Opcode = LoongArch::XVLD; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; + else +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index dc37b37b2186..a3afd4789dfc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -10,6 +10,30 @@ + // + //===----------------------------------------------------------------------===// + ++def lasxsplati8 ++ : PatFrag<(ops node:$e0), ++ (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati16 ++ : PatFrag<(ops node:$e0), ++ (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati32 ++ : PatFrag<(ops node:$e0), ++ (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplati64 ++ : PatFrag<(ops node:$e0), ++ (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; ++ + //===----------------------------------------------------------------------===// + // Instruction class templates + //===----------------------------------------------------------------------===// +@@ -1029,4 +1053,682 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.d", "$xd, $imm">; + } + ++def PseudoXVBNZ_B : VecCond; ++def PseudoXVBNZ_H : VecCond; ++def PseudoXVBNZ_W : VecCond; ++def PseudoXVBNZ_D : VecCond; ++def PseudoXVBNZ : VecCond; ++ ++def PseudoXVBZ_B : VecCond; ++def PseudoXVBZ_H : VecCond; ++def PseudoXVBZ_W : VecCond; ++def PseudoXVBZ_D : VecCond; ++def PseudoXVBZ : VecCond; ++ ++} // Predicates = [HasExtLASX] ++ ++multiclass PatXr { ++ def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), ++ (!cast(Inst#"_B") LASX256:$xj)>; ++ def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), ++ (!cast(Inst#"_H") LASX256:$xj)>; ++ def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), ++ (!cast(Inst#"_W") LASX256:$xj)>; ++ def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), ++ (!cast(Inst#"_D") LASX256:$xj)>; ++} ++ ++multiclass PatXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrXrF { ++ def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), ++ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrXrU { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatXrSimm5 { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), ++ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; ++} ++ ++multiclass PatXrUimm5 { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; ++} ++ ++multiclass PatXrXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), ++ (v32i8 LASX256:$xk)), ++ (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), ++ (v16i16 LASX256:$xk)), ++ (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), ++ (v8i32 LASX256:$xk)), ++ (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), ++ (v4i64 LASX256:$xk)), ++ (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatShiftXrXr { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, ++ (v32i8 LASX256:$xk))), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, ++ (v16i16 LASX256:$xk))), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, ++ (v8i32 LASX256:$xk))), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, ++ (v4i64 LASX256:$xk))), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatShiftXrUimm { ++ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), ++ (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; ++ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), ++ (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; ++ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), ++ (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), ++ (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; ++} ++ ++class PatXrXrB ++ : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (Inst LASX256:$xj, LASX256:$xk)>; ++ ++let Predicates = [HasExtLASX] in { ++ ++// XVADD_{B/H/W/D} ++defm : PatXrXr; ++// XVSUB_{B/H/W/D} ++defm : PatXrXr; ++ ++// XVADDI_{B/H/W/D}U ++defm : PatXrUimm5; ++// XVSUBI_{B/H/W/D}U ++defm : PatXrUimm5; ++ ++// XVNEG_{B/H/W/D} ++def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; ++def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; ++ ++// XVMAX[I]_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++defm : PatXrSimm5; ++defm : PatXrUimm5; ++ ++// XVMIN[I]_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++defm : PatXrSimm5; ++defm : PatXrUimm5; ++ ++// XVMUL_{B/H/W/D} ++defm : PatXrXr; ++ ++// XVMADD_{B/H/W/D} ++defm : PatXrXrXr; ++// XVMSUB_{B/H/W/D} ++defm : PatXrXrXr; ++ ++// XVDIV_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ ++// XVMOD_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ ++// XVAND_V ++def : PatXrXrB; ++// XVNOR_V ++def : PatXrXrB; ++// XVXOR_V ++def : PatXrXrB; ++// XVNOR_V ++def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), ++ (XVNOR_V LASX256:$xj, LASX256:$xk)>; ++ ++// XVANDI_B ++def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVANDI_B LASX256:$xj, uimm8:$imm)>; ++// XVORI_B ++def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVORI_B LASX256:$xj, uimm8:$imm)>; ++ ++// XVXORI_B ++def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), ++ (XVXORI_B LASX256:$xj, uimm8:$imm)>; ++ ++// XVSLL[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVSRL[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVSRA[I]_{B/H/W/D} ++defm : PatXrXr; ++defm : PatShiftXrXr; ++defm : PatShiftXrUimm; ++ ++// XVPCNT_{B/H/W/D} ++defm : PatXr; ++ ++// XVBITCLR_{B/H/W/D} ++def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), ++ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), ++ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), ++ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), ++ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v32i8:$xk)))), ++ (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v16i16:$xk)))), ++ (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v8i32:$xk)))), ++ (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, ++ (vsplati64imm63 v4i64:$xk)))), ++ (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITCLRI_{B/H/W/D} ++def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), ++ (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), ++ (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), ++ (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), ++ (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVBITSET_{B/H/W/D} ++def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), ++ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), ++ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), ++ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), ++ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), ++ (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), ++ (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), ++ (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), ++ (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITSETI_{B/H/W/D} ++def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVBITREV_{B/H/W/D} ++def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), ++ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), ++ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), ++ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), ++ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; ++def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), ++ (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; ++def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), ++ (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; ++def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), ++ (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; ++def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), ++ (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; ++ ++// XVBITREVI_{B/H/W/D} ++def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), ++ (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; ++def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), ++ (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; ++def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), ++ (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; ++def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), ++ (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; ++ ++// XVFADD_{S/D} ++defm : PatXrXrF; ++ ++// XVFSUB_{S/D} ++defm : PatXrXrF; ++ ++// XVFMUL_{S/D} ++defm : PatXrXrF; ++ ++// XVFDIV_{S/D} ++defm : PatXrXrF; ++ ++// XVFMADD_{S/D} ++def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), ++ (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), ++ (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVINSGR2VR_{W/D} ++def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), ++ (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; ++def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), ++ (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; ++ ++// XVPICKVE2GR_W[U] ++def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), ++ (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; ++def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), ++ (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; ++ ++// XVREPLGR2VR_{B/H/W/D} ++def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; ++def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; ++def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; ++def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; ++ ++// XVREPLVE_{B/H/W/D} ++def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), ++ (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), ++ (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), ++ (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; ++def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), ++ (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; ++ ++// Loads/Stores ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in { ++ defm : LdPat; ++ def : RegRegLdPat; ++ defm : StPat; ++ def : RegRegStPat; ++} ++ ++} // Predicates = [HasExtLASX] ++ ++/// Intrinsic pattern ++ ++class deriveLASXIntrinsic { ++ Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); ++} ++ ++let Predicates = [HasExtLASX] in { ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj, vty:$xk), ++// (LAInst vty:$xj, vty:$xk)>; ++foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", ++ "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", ++ "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", ++ "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", ++ "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", ++ "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", ++ "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", ++ "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", ++ "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", ++ "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", ++ "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", ++ "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", ++ "XVILVL_B", "XVILVH_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", ++ "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", ++ "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", ++ "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", ++ "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", ++ "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", ++ "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", ++ "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", ++ "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", ++ "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", ++ "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", ++ "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", ++ "XVSSRARN_BU_H", ++ "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", ++ "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", ++ "XVILVL_H", "XVILVH_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", ++ "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", ++ "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", ++ "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", ++ "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", ++ "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", ++ "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", ++ "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", ++ "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", ++ "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", ++ "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", ++ "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", ++ "XVSSRARN_HU_W", ++ "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", ++ "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", ++ "XVILVL_W", "XVILVH_W", "XVPERM_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVADD_Q", "XVSUB_Q", ++ "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", ++ "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", ++ "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", ++ "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", ++ "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", ++ "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", ++ "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", ++ "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", ++ "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", ++ "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", ++ "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", ++ "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", ++ "XVSSRARN_WU_D", "XVFFINT_S_L", ++ "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", ++ "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", ++ "XVILVL_D", "XVILVH_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), ++// (LAInst vty:$xd, vty:$xj, vty:$xk)>; ++foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", ++ "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", ++ "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", ++ "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", ++ "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj), ++// (LAInst vty:$xj)>; ++foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", ++ "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", ++ "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", ++ "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", ++ "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", ++ "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", ++ "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", ++ "VEXT2XV_DU_HU", "XVREPLVE0_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", ++ "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", ++ "XVFFINTL_D_W", "XVFFINTH_D_W", ++ "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", ++ "XVEXTL_Q_D", "XVEXTL_QU_DU", ++ "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", ++ "XVREPLVE0_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++ ++// Pat<(Intrinsic timm:$imm) ++// (LAInst timm:$imm)>; ++def : Pat<(int_loongarch_lasx_xvldi timm:$imm), ++ (XVLDI (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret timm:$imm), ++ (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xj, timm:$imm) ++// (LAInst vty:$xj, timm:$imm)>; ++foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", ++ "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", ++ "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", ++ "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", ++ "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", ++ "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", ++ "XVREPL128VEI_H", "XVSHUF4I_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", ++ "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", ++ "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", ++ "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", ++ "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", ++ "XVPICKVE2GR_D", "XVPICKVE2GR_DU", ++ "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) ++// (LAInst vty:$xd, vty:$xj, timm:$imm)>; ++foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", ++ "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", ++ "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", ++ "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", ++ "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", ++ "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", ++ "XVFRSTPI_H", "XVEXTRINS_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", ++ "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", ++ "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", ++ "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", ++ "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", ++ "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", ++ "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, ++ (to_valide_timm timm:$imm))>; ++ ++// vty: v32i8/v16i16/v8i32/v4i64 ++// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), ++// (LAInst vty:$xd, vty:$xj, vty:$xk)>; ++foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), ++ (v8i32 LASX256:$xk)), ++ (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), ++ (v4i64 LASX256:$xk)), ++ (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), ++// (LAInst vty:$xj, vty:$xk, vty:$xa)>; ++foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj, vty:$xk), ++// (LAInst vty:$xj, vty:$xk)>; ++foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", ++ "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", ++ "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", ++ "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", ++ "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", ++ "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", ++ "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", ++ "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", ++ "XVFTINT_W_D", ++ "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", ++ "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", ++ "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", ++ "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", ++ "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", ++ "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret ++ (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), ++ (!cast(Inst) LASX256:$xj, LASX256:$xk)>; ++ ++// vty: v8f32/v4f64 ++// Pat<(Intrinsic vty:$xj), ++// (LAInst vty:$xj)>; ++foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", ++ "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", ++ "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", ++ "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", ++ "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", ++ "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", ++ "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", ++ "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", ++ "XVFTINTH_L_S"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", ++ "XVFRINT_D", ++ "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", ++ "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", ++ "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in ++ def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), ++ (!cast(Inst) LASX256:$xj)>; ++ ++def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), ++ (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), ++ (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; ++ ++// load ++def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), ++ (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), ++ (XVLDX GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), ++ (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), ++ (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), ++ (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), ++ (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ ++// store ++def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), ++ (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; ++def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), ++ (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; ++ ++def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), ++ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), ++ (to_valide_timm timm:$idx))>; ++ + } // Predicates = [HasExtLASX] +-- +2.20.1 + diff --git a/0014-LoongArch-Add-LSX-intrinsic-testcases.patch b/0014-LoongArch-Add-LSX-intrinsic-testcases.patch new file mode 100644 index 0000000000000000000000000000000000000000..e9b6a424a2abfc5f5e679bbafe1c23587d8c2ad6 --- /dev/null +++ b/0014-LoongArch-Add-LSX-intrinsic-testcases.patch @@ -0,0 +1,9906 @@ +From a818acf6c9a103bbc0af472b54b1d78330e36f79 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:10:41 +0800 +Subject: [PATCH 14/66] [LoongArch] Add LSX intrinsic testcases + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155834 + +(cherry picked from commit f3aa4416319aed198841401c6c9dc2e49afe2507) + +Change-Id: I6be1451ab80af72359bd8bca14dca0a36d7c0b62 +--- + .../CodeGen/LoongArch/lsx/intrinsic-absd.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-add.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-adda.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-addi.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-addw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-and.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-andi.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-andn.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-avg.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-avgr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitclr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitrev.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bitsel.ll | 14 + + .../LoongArch/lsx/intrinsic-bitseli.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-bitset.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-bsll.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-bsrl.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-clo.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-clz.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-div.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-exth.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-extl.ll | 26 + + .../LoongArch/lsx/intrinsic-extrins.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-fadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fclass.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-fcvt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcvth.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fdiv.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ffint.ll | 86 +++ + .../CodeGen/LoongArch/lsx/intrinsic-flogb.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmax.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmin.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmina.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fmul.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frecip.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frint.ll | 122 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-frstp.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-fsub.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ftint.ll | 350 ++++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-haddw.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-hsubw.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ilv.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-insgr2vr.ll | 54 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ld.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-ldi.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-madd.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-maddw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-max.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-min.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-mod.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-mskgez.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-mskltz.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-msknz.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-msub.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-muh.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-mul.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-mulw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lsx/intrinsic-neg.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-nor.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-nori.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-or.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-ori.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-orn.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-pack.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-pcnt.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-permi.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-pick.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-pickve2gr.ll | 98 ++++ + .../LoongArch/lsx/intrinsic-replgr2vr.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-replve.ll | 50 ++ + .../LoongArch/lsx/intrinsic-replvei.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-rotr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sadd.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sat.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-seq.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-set.ll | 38 ++ + .../LoongArch/lsx/intrinsic-setallnez.ll | 74 +++ + .../LoongArch/lsx/intrinsic-setanyeqz.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-shuf.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll | 50 ++ + .../LoongArch/lsx/intrinsic-signcov.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-sle.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-sll.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sllwil.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-slt.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-sra.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-sran.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srani.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srar.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srarn.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srarni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srl.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srln.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlr.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-srlrn.ll | 38 ++ + .../CodeGen/LoongArch/lsx/intrinsic-srlrni.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-ssran.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrani.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll | 74 +++ + .../LoongArch/lsx/intrinsic-ssrarni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrln.ll | 74 +++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll | 74 +++ + .../LoongArch/lsx/intrinsic-ssrlrni.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-ssub.ll | 98 ++++ + .../CodeGen/LoongArch/lsx/intrinsic-st.ll | 26 + + .../CodeGen/LoongArch/lsx/intrinsic-stelm.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-sub.ll | 62 ++ + .../CodeGen/LoongArch/lsx/intrinsic-subi.ll | 50 ++ + .../CodeGen/LoongArch/lsx/intrinsic-subw.ll | 194 +++++++ + .../CodeGen/LoongArch/lsx/intrinsic-xor.ll | 14 + + .../CodeGen/LoongArch/lsx/intrinsic-xori.ll | 14 + + 123 files changed, 8902 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll +new file mode 100644 +index 000000000000..811d9d712de4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll +new file mode 100644 +index 000000000000..fac16c8308da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadd_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll +new file mode 100644 +index 000000000000..79be0a184bfb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vadda_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll +new file mode 100644 +index 000000000000..b9134e0724fe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vaddi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll +new file mode 100644 +index 000000000000..086e3bec12d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll +new file mode 100644 +index 000000000000..77496239c3a9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vand_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll +new file mode 100644 +index 000000000000..9a1c38a641d0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vandi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandi.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll +new file mode 100644 +index 000000000000..b08c759ecc32 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vandn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll +new file mode 100644 +index 000000000000..fb0861f4cd5e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavg_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll +new file mode 100644 +index 000000000000..8bf7d0ed8817 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vavgr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll +new file mode 100644 +index 000000000000..f5fba6dbb141 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitclr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitclri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll +new file mode 100644 +index 000000000000..ad56e88fdb88 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitrev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitrevi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll +new file mode 100644 +index 000000000000..4b4b5ff1fc8c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vbitsel_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll +new file mode 100644 +index 000000000000..28d342b5c378 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitseli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll +new file mode 100644 +index 000000000000..75d98e6f8bce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vbitset_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vbitseti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll +new file mode 100644 +index 000000000000..e7eb1cfcb407 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbsll_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll +new file mode 100644 +index 000000000000..fe0565297641 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vbsrl_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll +new file mode 100644 +index 000000000000..c581109f3fd0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vclo_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclo.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll +new file mode 100644 +index 000000000000..25c37b64349b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vclz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vclz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll +new file mode 100644 +index 000000000000..53166e84d269 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vdiv_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll +new file mode 100644 +index 000000000000..2f3e891a9eef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) ++ ++define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.h.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) ++ ++define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.w.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) ++ ++define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.q.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) ++ ++define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) ++ ++define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) ++ ++define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.du.wu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) ++ ++define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vexth_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vexth.qu.du $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll +new file mode 100644 +index 000000000000..cbf19e2a3919 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vextl_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextl.q.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) ++ ++define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vextl_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextl.qu.du $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll +new file mode 100644 +index 000000000000..8f03a2b81291 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vextrins_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll +new file mode 100644 +index 000000000000..569002314c92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll +new file mode 100644 +index 000000000000..0c6682187101 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) ++ ++define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfclass_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfclass.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) ++ ++define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfclass_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfclass.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll +new file mode 100644 +index 000000000000..669c53b73b16 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll +@@ -0,0 +1,530 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_caf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_caf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_ceq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_ceq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_clt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_clt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_cune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_saf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_saf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_seq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_seq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_slt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_slt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) ++ ++define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) ++ ++define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcmp_sune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll +new file mode 100644 +index 000000000000..a6a151a96d84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) ++ ++define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcvt_h_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) ++ ++define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfcvt_s_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll +new file mode 100644 +index 000000000000..a9e4328bd011 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) ++ ++define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvth_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) ++ ++define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvth_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll +new file mode 100644 +index 000000000000..9a69964bb227 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) ++ ++define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvtl_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) ++ ++define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfcvtl_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll +new file mode 100644 +index 000000000000..1ca8e5e2c0e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfdiv_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll +new file mode 100644 +index 000000000000..62fbcfa339cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) ++ ++define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_s_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) ++ ++define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_d_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.d.l $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) ++ ++define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_s_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.wu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) ++ ++define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vffint_d_lu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.d.lu $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) ++ ret <2 x double> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) ++ ++define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffintl_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffintl.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) ++ ret <2 x double> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) ++ ++define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vffinth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffinth.d.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) ++ ++define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vffint_s_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll +new file mode 100644 +index 000000000000..d8382acc70ed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) ++ ++define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vflogb_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vflogb.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) ++ ++define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vflogb_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vflogb.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll +new file mode 100644 +index 000000000000..adbaf6c76b1b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll +new file mode 100644 +index 000000000000..89f757c4e456 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmax_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll +new file mode 100644 +index 000000000000..5662acc0b9a1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmaxa_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmaxa_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll +new file mode 100644 +index 000000000000..0f844240277f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmin_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll +new file mode 100644 +index 000000000000..27f70b5fba32 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmina_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmina_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll +new file mode 100644 +index 000000000000..856ca9cadbd9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll +new file mode 100644 +index 000000000000..1e6c4c77d536 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmul_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll +new file mode 100644 +index 000000000000..e1a9ea78ef9d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll +new file mode 100644 +index 000000000000..46db0f4a5061 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { ++; CHECK-LABEL: lsx_vfnmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll +new file mode 100644 +index 000000000000..669fde5912d4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecip_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecip.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrecip_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrecip.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll +new file mode 100644 +index 000000000000..8d872fc72962 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrne.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrne.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrz_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrz.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrp_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrp.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrp_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrp.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrm_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrm.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrintrm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrintrm.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) ++ ret <2 x double> %res ++} ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrint_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrint.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrint_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrint.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll +new file mode 100644 +index 000000000000..326d87308b0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) ++ ++define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) ++ ++define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfrsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll +new file mode 100644 +index 000000000000..5c072b194d4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vfrstp_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vfrstp_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vfrstpi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vfrstpi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll +new file mode 100644 +index 000000000000..55bffba9e99e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) ++ ++define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vfsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsqrt.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) ++ ++define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vfsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsqrt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll +new file mode 100644 +index 000000000000..2beba4a70dc9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) ++ ++define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { ++; CHECK-LABEL: lsx_vfsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) ++ ret <4 x float> %res ++} ++ ++declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) ++ ++define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vfsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) ++ ret <2 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll +new file mode 100644 +index 000000000000..2a494cd7fa87 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll +@@ -0,0 +1,350 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrne_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrne_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrp_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrp_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrm_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrm_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.w.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.l.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrz_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) ++ ++define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.wu.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) ++ ++define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { ++; CHECK-LABEL: lsx_vftint_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.lu.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) ++ ret <2 x i64> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrne_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrz_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrp_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftintrm_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) ++ ++define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { ++; CHECK-LABEL: lsx_vftint_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrnel_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrneh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrzl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrzh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrpl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrph_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrml_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintrmh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftintl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftintl.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) ++ ++define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { ++; CHECK-LABEL: lsx_vftinth_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vftinth.l.s $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll +new file mode 100644 +index 000000000000..05725582334a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhaddw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll +new file mode 100644 +index 000000000000..dd5815b2ea85 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vhsubw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll +new file mode 100644 +index 000000000000..77b0b3484df8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vilvh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll +new file mode 100644 +index 000000000000..61d2cbd28066 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll +@@ -0,0 +1,54 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vinsgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll +new file mode 100644 +index 000000000000..b9e2ff8088d8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld(i8* %p) nounwind { ++; CHECK-LABEL: lsx_vld: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) ++ ++define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { ++; CHECK-LABEL: lsx_vldx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldx $vr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll +new file mode 100644 +index 000000000000..ace910b54d9a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi() nounwind { ++; CHECK-LABEL: lsx_vldi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldi $vr0, 4095 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b() nounwind { ++; CHECK-LABEL: lsx_vrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.b $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h() nounwind { ++; CHECK-LABEL: lsx_vrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.h $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w() nounwind { ++; CHECK-LABEL: lsx_vrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.w $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d() nounwind { ++; CHECK-LABEL: lsx_vrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.d $vr0, 511 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll +new file mode 100644 +index 000000000000..1a9cf3d3a766 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vldrepl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll +new file mode 100644 +index 000000000000..89503724fd73 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll +new file mode 100644 +index 000000000000..1e3ab25a5fcf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll +new file mode 100644 +index 000000000000..4dd289cf6ed7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmax_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmaxi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll +new file mode 100644 +index 000000000000..aa12a5ead6a3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmin_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmini_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmini.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll +new file mode 100644 +index 000000000000..6b3dc6865584 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmod_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll +new file mode 100644 +index 000000000000..3ecd777aee67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmskgez_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskgez.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll +new file mode 100644 +index 000000000000..be00c76137c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vmskltz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmskltz.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll +new file mode 100644 +index 000000000000..02f1752f7190 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vmsknz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsknz.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll +new file mode 100644 +index 000000000000..98684e10c78e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll +new file mode 100644 +index 000000000000..a4deb8f8f823 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmuh_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll +new file mode 100644 +index 000000000000..aca60d1663b7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll +new file mode 100644 +index 000000000000..eb55c1f809e3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vmulwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll +new file mode 100644 +index 000000000000..43c6e9757614 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vneg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vneg.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll +new file mode 100644 +index 000000000000..16619225f2d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vnor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll +new file mode 100644 +index 000000000000..c2388a1e0da3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vnori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vnori.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll +new file mode 100644 +index 000000000000..ab557003d150 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll +new file mode 100644 +index 000000000000..85c0f432c54a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vori.b $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll +new file mode 100644 +index 000000000000..4528628e02c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vorn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll +new file mode 100644 +index 000000000000..70a3620d1757 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpackod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll +new file mode 100644 +index 000000000000..431b270ab0a1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) ++ ++define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.b $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) ++ ++define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.h $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) ++ ++define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.w $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) ++ ++define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpcnt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpcnt.d $vr0, $vr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll +new file mode 100644 +index 000000000000..b8367d98caf6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpermi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll +new file mode 100644 +index 000000000000..4ebf29e1409c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vpickod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll +new file mode 100644 +index 000000000000..ed56d30ce3c4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vpickve2gr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll +new file mode 100644 +index 000000000000..091f1c98c228 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) ++ ++define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) ++ ++define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) ++ ++define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) ++ ++define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { ++; CHECK-LABEL: lsx_vreplgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll +new file mode 100644 +index 000000000000..3ba184dad052 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK-LABEL: lsx_vreplve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll +new file mode 100644 +index 000000000000..9b8af1878cb8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vreplvei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll +new file mode 100644 +index 000000000000..df8650677147 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vrotr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vrotri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll +new file mode 100644 +index 000000000000..a54f955766df +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsadd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll +new file mode 100644 +index 000000000000..4286842a63b9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsat_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsat.du $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll +new file mode 100644 +index 000000000000..3cb4acd82439 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vseq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vseqi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll +new file mode 100644 +index 000000000000..3188fb4e2c2e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) ++ ++define i32 @lsx_bz_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vseteqz.v $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) ++ ++define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetnez.v $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll +new file mode 100644 +index 000000000000..22e01922e87b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) ++ ++define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) ++ ++define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) ++ ++define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) ++ ++define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_bnz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll +new file mode 100644 +index 000000000000..96c79c10e468 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) ++ ++define i32 @lsx_bz_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_bz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) ++ ++define i32 @lsx_bz_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_bz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) ++ ++define i32 @lsx_bz_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_bz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) ++ ++define i32 @lsx_bz_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_bz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll +new file mode 100644 +index 000000000000..f5d516521e45 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { ++; CHECK-LABEL: lsx_vshuf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll +new file mode 100644 +index 000000000000..1ad5f2af5591 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vshuf4i_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll +new file mode 100644 +index 000000000000..3997b0cc995c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsigncov_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll +new file mode 100644 +index 000000000000..5a9d5f06e63f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsle_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslei_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslei.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll +new file mode 100644 +index 000000000000..7bc20af41f17 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsll_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslli.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll +new file mode 100644 +index 000000000000..29ab70da1ced +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsllwil_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll +new file mode 100644 +index 000000000000..18683e9dc46f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.b $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.w $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.d $vr0, $vr0, -16 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vslt_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vslti_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vslti.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll +new file mode 100644 +index 000000000000..e85c8464c18e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsra_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrai_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll +new file mode 100644 +index 000000000000..4ffe5a704c2c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll +new file mode 100644 +index 000000000000..717c641616c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll +new file mode 100644 +index 000000000000..8b52b7ac9631 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrar_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrari_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll +new file mode 100644 +index 000000000000..d4cdfb5359ea +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll +new file mode 100644 +index 000000000000..2253e88372fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll +new file mode 100644 +index 000000000000..1cddd9622233 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll +new file mode 100644 +index 000000000000..1c9b23243ffb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll +new file mode 100644 +index 000000000000..6e523efa1824 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll +new file mode 100644 +index 000000000000..51638fa1a47f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsrlri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll +new file mode 100644 +index 000000000000..893e51396241 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll +new file mode 100644 +index 000000000000..d1ea450d2237 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll +new file mode 100644 +index 000000000000..cecccbb730c9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssran_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll +new file mode 100644 +index 000000000000..57b8eb169866 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrani_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll +new file mode 100644 +index 000000000000..c6b7d9ec8e1d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll +new file mode 100644 +index 000000000000..1a2e91962ac3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrarni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll +new file mode 100644 +index 000000000000..697ccc3962a8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrln_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll +new file mode 100644 +index 000000000000..8dd41e7abe87 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll +new file mode 100644 +index 000000000000..a8e76cbaa7fd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) ++ ++define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) ++ ++define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) ++ ++define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll +new file mode 100644 +index 000000000000..869e81b2b09d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssrlrni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll +new file mode 100644 +index 000000000000..c594b426d650 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vssub_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll +new file mode 100644 +index 000000000000..798f509f2318 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vst: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vst $vr0, $a0, -2048 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) ++ ++define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { ++; CHECK-LABEL: lsx_vstx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstx $vr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll +new file mode 100644 +index 000000000000..6b9e7a9d7462 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { ++; CHECK-LABEL: lsx_vstelm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll +new file mode 100644 +index 000000000000..5c04a3d8de0d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) ++ ++define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) ++ ++define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsub_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll +new file mode 100644 +index 000000000000..304a4e4a78cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { ++; CHECK-LABEL: lsx_vsubi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll +new file mode 100644 +index 000000000000..48100db74334 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) ++ ++define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) ++ ++define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) ++ ++define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) ++ ret <2 x i64> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) ++ ++define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK-LABEL: lsx_vsubwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll +new file mode 100644 +index 000000000000..72a1fe93c2c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) ++ ++define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK-LABEL: lsx_vxor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll +new file mode 100644 +index 000000000000..09669cd5ac14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { ++; CHECK-LABEL: lsx_vxori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vxori.b $vr0, $vr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) ++ ret <16 x i8> %res ++} +-- +2.20.1 + diff --git a/0015-LoongArch-Add-LASX-intrinsic-testcases.patch b/0015-LoongArch-Add-LASX-intrinsic-testcases.patch new file mode 100644 index 0000000000000000000000000000000000000000..6fe4afedb0392d515267cd04e1983e8039bd6b95 --- /dev/null +++ b/0015-LoongArch-Add-LASX-intrinsic-testcases.patch @@ -0,0 +1,10198 @@ +From 9c263a60aeeefe92b71e566b482e833e2b6d8f4f Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:12:27 +0800 +Subject: [PATCH 15/66] [LoongArch] Add LASX intrinsic testcases + +Depends on D155830 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D155835 + +(cherry picked from commit 83311b2b5d1b9869f9a7b265994394ea898448a2) + +Change-Id: I44c29158a47583668a19f829ed7f7fc36cd8d573 +--- + .../CodeGen/LoongArch/lasx/intrinsic-absd.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-add.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-adda.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-addi.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-addw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-and.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-andi.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-andn.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-avg.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-avgr.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitclr.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitrev.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-bitsel.ll | 14 + + .../LoongArch/lasx/intrinsic-bitseli.ll | 14 + + .../LoongArch/lasx/intrinsic-bitset.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-bsll.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-bsrl.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-clo.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-clz.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-div.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ext2xv.ll | 146 +++++ + .../CodeGen/LoongArch/lasx/intrinsic-exth.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-extl.ll | 26 + + .../LoongArch/lasx/intrinsic-extrins.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-fadd.ll | 26 + + .../LoongArch/lasx/intrinsic-fclass.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcmp.ll | 530 ++++++++++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-fcvt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcvth.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fdiv.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ffint.ll | 86 +++ + .../CodeGen/LoongArch/lasx/intrinsic-flogb.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmadd.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmax.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmin.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmina.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmsub.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fmul.ll | 26 + + .../LoongArch/lasx/intrinsic-fnmadd.ll | 26 + + .../LoongArch/lasx/intrinsic-fnmsub.ll | 26 + + .../LoongArch/lasx/intrinsic-frecip.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-frint.ll | 122 ++++ + .../LoongArch/lasx/intrinsic-frsqrt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-frstp.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-fsub.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ftint.ll | 350 ++++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-haddw.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-hsubw.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ilv.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-insgr2vr.ll | 28 + + .../LoongArch/lasx/intrinsic-insve0.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ld.ll | 26 + + .../CodeGen/LoongArch/lasx/intrinsic-ldi.ll | 62 ++ + .../LoongArch/lasx/intrinsic-ldrepl.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-madd.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-maddw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-max.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-min.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-mod.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-mskgez.ll | 14 + + .../LoongArch/lasx/intrinsic-mskltz.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-msknz.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-msub.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-muh.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-mul.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-mulw.ll | 290 ++++++++++ + .../CodeGen/LoongArch/lasx/intrinsic-neg.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-nor.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-nori.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-or.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-ori.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-orn.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-pack.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-pcnt.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-perm.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-permi.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-pick.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-pickve.ll | 50 ++ + .../LoongArch/lasx/intrinsic-pickve2gr.ll | 53 ++ + .../LoongArch/lasx/intrinsic-repl128vei.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replgr2vr.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replve.ll | 50 ++ + .../LoongArch/lasx/intrinsic-replve0.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-rotr.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sadd.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sat.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-seq.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-set.ll | 38 ++ + .../LoongArch/lasx/intrinsic-setallnez.ll | 74 +++ + .../LoongArch/lasx/intrinsic-setanyeqz.ll | 74 +++ + .../CodeGen/LoongArch/lasx/intrinsic-shuf.ll | 50 ++ + .../LoongArch/lasx/intrinsic-shuf4i.ll | 50 ++ + .../LoongArch/lasx/intrinsic-signcov.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-sle.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-sll.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-sllwil.ll | 74 +++ + .../CodeGen/LoongArch/lasx/intrinsic-slt.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-sra.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-sran.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srani.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srar.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srarn.ll | 38 ++ + .../LoongArch/lasx/intrinsic-srarni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srl.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srln.ll | 38 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srlni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-srlr.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-srlrn.ll | 38 ++ + .../LoongArch/lasx/intrinsic-srlrni.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-ssran.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrani.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ssrarn.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrarni.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ssrln.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrlni.ll | 98 ++++ + .../LoongArch/lasx/intrinsic-ssrlrn.ll | 74 +++ + .../LoongArch/lasx/intrinsic-ssrlrni.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-ssub.ll | 98 ++++ + .../CodeGen/LoongArch/lasx/intrinsic-st.ll | 27 + + .../CodeGen/LoongArch/lasx/intrinsic-stelm.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-sub.ll | 62 ++ + .../CodeGen/LoongArch/lasx/intrinsic-subi.ll | 50 ++ + .../CodeGen/LoongArch/lasx/intrinsic-subw.ll | 194 +++++++ + .../CodeGen/LoongArch/lasx/intrinsic-xor.ll | 14 + + .../CodeGen/LoongArch/lasx/intrinsic-xori.ll | 14 + + 128 files changed, 9154 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll +new file mode 100644 +index 000000000000..bf54f44357b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvabsd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll +new file mode 100644 +index 000000000000..0c2f2ace29fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadd_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll +new file mode 100644 +index 000000000000..c1258d53e913 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvadda_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll +new file mode 100644 +index 000000000000..09b5d07a0151 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvaddi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll +new file mode 100644 +index 000000000000..ef7a1b5a50ef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll +new file mode 100644 +index 000000000000..15f3a8094770 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvand_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll +new file mode 100644 +index 000000000000..88cf142d6968 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvandi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll +new file mode 100644 +index 000000000000..f385ef3661cb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvandn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll +new file mode 100644 +index 000000000000..488d3b96b003 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavg_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll +new file mode 100644 +index 000000000000..b5ab5a5366aa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvavgr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll +new file mode 100644 +index 000000000000..cec71bab2fe8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitclr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitclri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll +new file mode 100644 +index 000000000000..fb4f9fbc2e4b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitrev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitrevi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll +new file mode 100644 +index 000000000000..2e91407590ac +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvbitsel_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll +new file mode 100644 +index 000000000000..79dd55cbfef9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitseli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll +new file mode 100644 +index 000000000000..83d1f0ef60c6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvbitset_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvbitseti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll +new file mode 100644 +index 000000000000..cbb63ced5cc0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbsll_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll +new file mode 100644 +index 000000000000..b0c26cbe3e35 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvbsrl_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll +new file mode 100644 +index 000000000000..29b2be03d54e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvclo_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclo.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll +new file mode 100644 +index 000000000000..5247ceedbd14 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvclz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvclz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll +new file mode 100644 +index 000000000000..813204092e94 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvdiv_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll +new file mode 100644 +index 000000000000..48721b52af00 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll +@@ -0,0 +1,146 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) ++ ++define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) ++ ++define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_w_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) ++ ++define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) ++ ++define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) ++ ++define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) ++ ++define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) ++ ++define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) ++ ++define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_wu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) ++ ++define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) ++ ++define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) ++ ++define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) ++ ++define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_vext2xv_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll +new file mode 100644 +index 000000000000..543589e61b12 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) ++ ++define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.h.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) ++ ++define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.w.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) ++ ++define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.q.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) ++ ++define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) ++ ++define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) ++ ++define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) ++ ++define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvexth_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll +new file mode 100644 +index 000000000000..7040c8c784cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvextl_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextl.q.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) ++ ++define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvextl_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll +new file mode 100644 +index 000000000000..c8774a7b29c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvextrins_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll +new file mode 100644 +index 000000000000..563a0ce9e384 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll +new file mode 100644 +index 000000000000..901ca5bb0260 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfclass_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfclass.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfclass_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfclass.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll +new file mode 100644 +index 000000000000..b01f908e71af +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll +@@ -0,0 +1,530 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_caf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_caf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_ceq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_ceq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_clt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_clt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_cune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_saf_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_saf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sun_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sun_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_seq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_seq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sueq_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sueq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_slt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_slt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sult_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sult_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sle_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sule_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sule_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sor_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sor_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) ++ ++define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sune_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) ++ ++define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcmp_sune_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll +new file mode 100644 +index 000000000000..82bf1d3df72c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) ++ ++define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcvt_h_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) ++ ++define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfcvt_s_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll +new file mode 100644 +index 000000000000..e1a6a2923e67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) ++ ++define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvth_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) ++ ++define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvth_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll +new file mode 100644 +index 000000000000..0b3e693c7f51 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) ++ ++define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvtl_s_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) ++ ++define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfcvtl_d_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll +new file mode 100644 +index 000000000000..49923ddd4e8d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfdiv_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfdiv_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll +new file mode 100644 +index 000000000000..24da0bd33838 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) ++ ++define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) ++ ++define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_d_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) ++ ++define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) ++ ++define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvffint_d_lu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) ++ ret <4 x double> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) ++ ++define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffintl_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) ++ ret <4 x double> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) ++ ++define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvffinth_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) ++ ++define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvffint_s_l: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x float> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll +new file mode 100644 +index 000000000000..bccef4504d70 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) ++ ++define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvflogb_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvflogb.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) ++ ++define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvflogb_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvflogb.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll +new file mode 100644 +index 000000000000..0fc06f971660 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll +new file mode 100644 +index 000000000000..2422fa0c00d8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmax_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll +new file mode 100644 +index 000000000000..cd9ccc656aef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmaxa_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmaxa_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll +new file mode 100644 +index 000000000000..effb3f9e1d75 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmin_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll +new file mode 100644 +index 000000000000..753a6f31ba06 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmina_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmina_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll +new file mode 100644 +index 000000000000..57909d0dd168 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll +new file mode 100644 +index 000000000000..9cad6f383066 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmul_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll +new file mode 100644 +index 000000000000..c30993590f98 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmadd_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll +new file mode 100644 +index 000000000000..2e7ca695be62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfnmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll +new file mode 100644 +index 000000000000..da3a26df2824 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecip_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecip.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrecip_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrecip.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll +new file mode 100644 +index 000000000000..ddead27cd14b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrne_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrne_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrz_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrp_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrp_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrm_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrintrm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) ++ ret <4 x double> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrint_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrint.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrint_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrint.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll +new file mode 100644 +index 000000000000..6efa8122baf1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfrsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll +new file mode 100644 +index 000000000000..e83e55a52a11 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfrstp_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvfrstp_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfrstpi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfrstpi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll +new file mode 100644 +index 000000000000..a13333d8d81c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) ++ ++define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvfsqrt_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) ++ ++define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvfsqrt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll +new file mode 100644 +index 000000000000..b52774a03618 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) ++ ++define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfsub_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) ++ ++define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvfsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll +new file mode 100644 +index 000000000000..74cd507f16d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll +@@ -0,0 +1,350 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_w_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.w.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_l_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.l.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) ++ ++define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_wu_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) ++ ++define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvftint_lu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) ++ ret <4 x i64> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrne_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrz_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrp_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftintrm_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) ++ ++define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { ++; CHECK-LABEL: lasx_xvftint_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrnel_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrneh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrzl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrzh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrpl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrph_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrml_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintrmh_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftintl_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) ++ ++define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvftinth_l_s: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll +new file mode 100644 +index 000000000000..2c64ab23806b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhaddw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll +new file mode 100644 +index 000000000000..a5223c1d89a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvhsubw_qu_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll +new file mode 100644 +index 000000000000..c9d0ca6b0324 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvilvh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll +new file mode 100644 +index 000000000000..ea98c96464ae +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvinsgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvinsgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll +new file mode 100644 +index 000000000000..27ae819c4144 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvinsve0_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvinsve0_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll +new file mode 100644 +index 000000000000..5ffc629db466 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvld: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) ++ ++define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { ++; CHECK-LABEL: lasx_xvldx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldx $xr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll +new file mode 100644 +index 000000000000..59f79dd32af3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi() nounwind { ++; CHECK-LABEL: lasx_xvldi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldi $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b() nounwind { ++; CHECK-LABEL: lasx_xvrepli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.b $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h() nounwind { ++; CHECK-LABEL: lasx_xvrepli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.h $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w() nounwind { ++; CHECK-LABEL: lasx_xvrepli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.w $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d() nounwind { ++; CHECK-LABEL: lasx_xvrepli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.d $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll +new file mode 100644 +index 000000000000..ae6abdf81cbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvldrepl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll +new file mode 100644 +index 000000000000..d3b09396727e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll +new file mode 100644 +index 000000000000..146624a764a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmaddwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll +new file mode 100644 +index 000000000000..9cf09df4439a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_vmax_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmax_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmaxi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll +new file mode 100644 +index 000000000000..c94b1e4ea44c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmin_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmini_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll +new file mode 100644 +index 000000000000..a177246bb235 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmod_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll +new file mode 100644 +index 000000000000..da87c20ad6ee +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskgez_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskgez.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll +new file mode 100644 +index 000000000000..b2218487535c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvmskltz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmskltz.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll +new file mode 100644 +index 000000000000..becd2c883a7e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvmsknz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsknz.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll +new file mode 100644 +index 000000000000..c89f9578b77d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvmsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll +new file mode 100644 +index 000000000000..97461512ce16 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmuh_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll +new file mode 100644 +index 000000000000..d5d852e58a9f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmul_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll +new file mode 100644 +index 000000000000..f69e64aa7698 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll +@@ -0,0 +1,290 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwev_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_h_bu_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_w_hu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_d_wu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvmulwod_q_du_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll +new file mode 100644 +index 000000000000..ecbedf334657 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvneg_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvneg.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll +new file mode 100644 +index 000000000000..674746b7624e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvnor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll +new file mode 100644 +index 000000000000..55eebf87ee92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvnori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll +new file mode 100644 +index 000000000000..16462cfafc54 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll +new file mode 100644 +index 000000000000..8e53d88bac37 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvori.b $xr0, $xr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll +new file mode 100644 +index 000000000000..3a335cdd3716 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvorn_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll +new file mode 100644 +index 000000000000..512b30234917 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpackod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll +new file mode 100644 +index 000000000000..d77f1d2082c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpcnt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpcnt.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll +new file mode 100644 +index 000000000000..4ec434edd4ec +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvperm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll +new file mode 100644 +index 000000000000..0d9f9daabc44 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpermi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpermi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpermi_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll +new file mode 100644 +index 000000000000..bbd6d693ca0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickev_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvpickod_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll +new file mode 100644 +index 000000000000..546777bc72ab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_w_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve_d_f: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll +new file mode 100644 +index 000000000000..0617e7424321 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll +@@ -0,0 +1,53 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++ ++ ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvpickve2gr_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll +new file mode 100644 +index 000000000000..25fab44f461f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvrepl128vei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll +new file mode 100644 +index 000000000000..c71abd2205c6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) ++ ++define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) ++ ++define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) ++ ++define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) ++ ++define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { ++; CHECK-LABEL: lasx_xvreplgr2vr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll +new file mode 100644 +index 000000000000..21d36ff7bb5e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK-LABEL: lasx_xvreplve_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll +new file mode 100644 +index 000000000000..7996bb36ef03 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) ++ ++define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.b $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) ++ ++define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.h $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) ++ ++define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.w $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) ++ ++define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.d $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) ++ ++define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvreplve0_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplve0.q $xr0, $xr0 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll +new file mode 100644 +index 000000000000..64d2773864e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvrotr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvrotri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll +new file mode 100644 +index 000000000000..54a5e2e9c833 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsadd_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll +new file mode 100644 +index 000000000000..293b9dc9eb4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsat_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll +new file mode 100644 +index 000000000000..83bc93c88c73 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvseq_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvseqi_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll +new file mode 100644 +index 000000000000..6e3e2e0330f5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) ++ ++define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) ++ ++define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll +new file mode 100644 +index 000000000000..a466b78bf8d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) ++ ++define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) ++ ++define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) ++ ++define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) ++ ++define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xbnz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll +new file mode 100644 +index 000000000000..36e65fc5b328 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) ++ ++define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB0_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) ++ ++define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB1_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) ++ ++define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB2_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) ++ ++define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xbz_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 ++; CHECK-NEXT: bcnez $fcc0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 0 ++; CHECK-NEXT: ret ++; CHECK-NEXT: .LBB3_2: # %entry ++; CHECK-NEXT: addi.w $a0, $zero, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) ++ ret i32 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll +new file mode 100644 +index 000000000000..9b9140f6ad62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { ++; CHECK-LABEL: lasx_xvshuf_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll +new file mode 100644 +index 000000000000..31205086759c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvshuf4i_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll +new file mode 100644 +index 000000000000..e6c6d8ccd0d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsigncov_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll +new file mode 100644 +index 000000000000..8895efc84b84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsle_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslei_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll +new file mode 100644 +index 000000000000..14110b613dbe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsll_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll +new file mode 100644 +index 000000000000..a72b8a6cbb4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_hu_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_wu_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsllwil_du_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll +new file mode 100644 +index 000000000000..3ea87adff110 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvslt_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvslti_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll +new file mode 100644 +index 000000000000..a7498682559b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsra_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrai_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll +new file mode 100644 +index 000000000000..f59ae4c19662 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll +new file mode 100644 +index 000000000000..91fb90da9c52 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll +new file mode 100644 +index 000000000000..e2c160557c4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrar_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrari_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll +new file mode 100644 +index 000000000000..02dd989773ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll +new file mode 100644 +index 000000000000..a7d2c3739793 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll +new file mode 100644 +index 000000000000..7b2992f2ca3b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrl_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrli_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll +new file mode 100644 +index 000000000000..dc5c0e016ea0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll +new file mode 100644 +index 000000000000..0301ebb195e2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll +new file mode 100644 +index 000000000000..e04504158e27 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlr_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsrlri_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll +new file mode 100644 +index 000000000000..1e7df379c6e1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll +new file mode 100644 +index 000000000000..56dbafe8b1ac +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll +new file mode 100644 +index 000000000000..da1857dad145 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssran_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll +new file mode 100644 +index 000000000000..9efa659b4a1e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrani_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll +new file mode 100644 +index 000000000000..b5d59ff06f4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll +new file mode 100644 +index 000000000000..da411dad645b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrarni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll +new file mode 100644 +index 000000000000..c60b5bdf81a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrln_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll +new file mode 100644 +index 000000000000..e57dd426bde8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll +new file mode 100644 +index 000000000000..774cf1bd5e84 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll +@@ -0,0 +1,74 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) ++ ++define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) ++ ++define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) ++ ++define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrn_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <8 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll +new file mode 100644 +index 000000000000..9a80516d8d78 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_b_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_h_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_w_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_d_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_bu_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_hu_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_wu_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssrlrni_du_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll +new file mode 100644 +index 000000000000..cd3ccd9f5262 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll +@@ -0,0 +1,98 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvssub_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll +new file mode 100644 +index 000000000000..b69e7b813f0c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll +@@ -0,0 +1,27 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvst: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvst $xr0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) ++ ++define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstx: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a1, $zero, 1 ++; CHECK-NEXT: xvstx $xr0, $a0, $a1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll +new file mode 100644 +index 000000000000..52ef3c471412 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { ++; CHECK-LABEL: lasx_xvstelm_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 ++; CHECK-NEXT: ret ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll +new file mode 100644 +index 000000000000..4d69dd83dcde +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll +@@ -0,0 +1,62 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) ++ ++define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) ++ ++define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsub_q: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll +new file mode 100644 +index 000000000000..cc3235ff4657 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll +@@ -0,0 +1,50 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { ++; CHECK-LABEL: lasx_xvsubi_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll +new file mode 100644 +index 000000000000..6f203e894990 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll +@@ -0,0 +1,194 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwev_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_h_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_w_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_d_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_q_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) ++ ++define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_h_bu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) ++ ++define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_w_hu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) ++ ++define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_d_wu: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) ++ ret <4 x i64> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) ++ ++define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK-LABEL: lasx_xvsubwod_q_du: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll +new file mode 100644 +index 000000000000..6395b3d6f2e7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) ++ ++define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK-LABEL: lasx_xvxor_v: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll +new file mode 100644 +index 000000000000..c71d7e731165 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll +@@ -0,0 +1,14 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { ++; CHECK-LABEL: lasx_xvxori_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 ++; CHECK-NEXT: ret ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) ++ ret <32 x i8> %res ++} +-- +2.20.1 + diff --git a/0016-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch b/0016-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch new file mode 100644 index 0000000000000000000000000000000000000000..c5075cdae907ec7e4da138c0a07fc358e6017365 --- /dev/null +++ b/0016-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch @@ -0,0 +1,5781 @@ +From a8d0c5a4d4b3f713fb817fd97b69b58fe9dbafd3 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:14:12 +0800 +Subject: [PATCH 16/66] [LoongArch] Add testcases of LASX intrinsics with + immediates + +The testcases mainly cover three situations: +- the arguments which should be immediates are non immediates. +- the immediate is out of upper limit of the argument type. +- the immediate is out of lower limit of the argument type. + +Depends on D155830 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157571 + +(cherry picked from commit 82bbf7003cabe2b6be8ab9b88bc96ecb8a64dc49) + +Change-Id: Id57d7de44d935476d782879e11f151ae9c7daf96 +--- + .../lasx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-addi-non-imm.ll | 37 +++++ + .../lasx/intrinsic-andi-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-andi-non-imm.ll | 10 ++ + .../lasx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitclr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitrev-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bitseli-invalid-imm.ll | 17 +++ + .../lasx/intrinsic-bitseli-non-imm.ll | 10 ++ + .../lasx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-bitset-non-imm.ll | 37 +++++ + .../lasx/intrinsic-bsll-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-bsll-non-imm.ll | 10 ++ + .../lasx/intrinsic-bsrl-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-bsrl-non-imm.ll | 10 ++ + .../lasx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-extrins-non-imm.ll | 37 +++++ + .../lasx/intrinsic-frstp-invalid-imm.ll | 33 +++++ + .../LoongArch/lasx/intrinsic-frstp-non-imm.ll | 19 +++ + .../lasx/intrinsic-insgr2vr-invalid-imm.ll | 33 +++++ + .../lasx/intrinsic-insgr2vr-non-imm.ll | 19 +++ + .../lasx/intrinsic-insve0-invalid-imm.ll | 33 +++++ + .../lasx/intrinsic-insve0-non-imm.ll | 19 +++ + .../lasx/intrinsic-ld-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-ld-non-imm.ll | 10 ++ + .../lasx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ + .../LoongArch/lasx/intrinsic-ldi-non-imm.ll | 46 +++++++ + .../lasx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-ldrepl-non-imm.ll | 37 +++++ + .../lasx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-max-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-min-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-nori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-nori-non-imm.ll | 10 ++ + .../lasx/intrinsic-ori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-ori-non-imm.ll | 10 ++ + .../lasx/intrinsic-permi-invalid-imm.ll | 49 +++++++ + .../LoongArch/lasx/intrinsic-permi-non-imm.ll | 28 ++++ + .../lasx/intrinsic-pickve-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-pickve-non-imm.ll | 37 +++++ + .../lasx/intrinsic-pickve2gr-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-pickve2gr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-repl128vei-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-repl128vei-non-imm.ll | 37 +++++ + .../lasx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-rotr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-sat-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-seq-non-imm.ll | 37 +++++ + .../lasx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-shuf4i-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-sle-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-sll-non-imm.ll | 37 +++++ + .../lasx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ + .../lasx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ + .../lasx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lasx/intrinsic-slt-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-sra-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srani-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srar-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-srarni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srl-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srlni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-srlr-non-imm.ll | 37 +++++ + .../lasx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ + .../lasx/intrinsic-srlrni-non-imm.ll | 37 +++++ + .../lasx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lasx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ + .../lasx/intrinsic-st-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-st-non-imm.ll | 10 ++ + .../lasx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ + .../LoongArch/lasx/intrinsic-stelm-non-imm.ll | 65 +++++++++ + .../lasx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lasx/intrinsic-subi-non-imm.ll | 37 +++++ + .../lasx/intrinsic-xori-invalid-imm.ll | 17 +++ + .../LoongArch/lasx/intrinsic-xori-non-imm.ll | 10 ++ + 94 files changed, 5003 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll +new file mode 100644 +index 000000000000..4998847f0910 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll +new file mode 100644 +index 000000000000..f25f0e61a28e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll +new file mode 100644 +index 000000000000..60f0b765f954 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll +new file mode 100644 +index 000000000000..1273dc6b450b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll +new file mode 100644 +index 000000000000..ecc287e89bbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll +new file mode 100644 +index 000000000000..09da85411082 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll +new file mode 100644 +index 000000000000..dff0884fdd5a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll +new file mode 100644 +index 000000000000..e1aef1a82f0c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll +new file mode 100644 +index 000000000000..3f6fd44f842c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll +new file mode 100644 +index 000000000000..40533ab96d86 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll +new file mode 100644 +index 000000000000..17a77ece7775 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll +new file mode 100644 +index 000000000000..613285804e0e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll +new file mode 100644 +index 000000000000..1da08a633bd2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll +new file mode 100644 +index 000000000000..e19a3232c179 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll +new file mode 100644 +index 000000000000..5d2b63391e67 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll +new file mode 100644 +index 000000000000..8dfd0ca579b8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll +new file mode 100644 +index 000000000000..1301b8a146eb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll +new file mode 100644 +index 000000000000..bca8f8b3c778 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll +new file mode 100644 +index 000000000000..64b4632669d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll +new file mode 100644 +index 000000000000..ca92cff9b2d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll +new file mode 100644 +index 000000000000..4982f2c7d43a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll +new file mode 100644 +index 000000000000..3accabf6dbd9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) ++ ++define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) ++ ++define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll +new file mode 100644 +index 000000000000..a54fa8515fba +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll +new file mode 100644 +index 000000000000..53e59db11aa6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll +new file mode 100644 +index 000000000000..20dd8a45d7f0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvld: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvld: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll +new file mode 100644 +index 000000000000..b23436a44832 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) ++ ++define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll +new file mode 100644 +index 000000000000..f3dd3650cf8a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll +@@ -0,0 +1,81 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvldi: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvldi_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvldi: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrepli_b_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrepli_h_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrepli_w_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d_lo() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrepli_d_hi() nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll +new file mode 100644 +index 000000000000..6466818bf674 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll +@@ -0,0 +1,46 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) ++ ++define <4 x i64> @lasx_xvldi(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) ++ ++define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) ++ ++define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) ++ ++define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) ++ ++define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll +new file mode 100644 +index 000000000000..cb62a839985a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll +new file mode 100644 +index 000000000000..075d663b0dd7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) ++ ++define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) ++ ++define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) ++ ++define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) ++ ++define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll +new file mode 100644 +index 000000000000..a671e9979b2f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll +new file mode 100644 +index 000000000000..b85798b53c92 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll +new file mode 100644 +index 000000000000..5ed4104c295f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll +new file mode 100644 +index 000000000000..b81931977aad +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll +new file mode 100644 +index 000000000000..1130e094bf1f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll +new file mode 100644 +index 000000000000..8f2333064d64 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll +new file mode 100644 +index 000000000000..90dec8e55f2d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll +new file mode 100644 +index 000000000000..ae6571d98f4a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll +new file mode 100644 +index 000000000000..41f4856bd8f7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll +@@ -0,0 +1,49 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll +new file mode 100644 +index 000000000000..afb335c5d6ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll +@@ -0,0 +1,28 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll +new file mode 100644 +index 000000000000..cfc6ec42874e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) ++ ret <8 x float> %res ++} ++ ++define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) ++ ret <4 x double> %res ++} ++ ++define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll +new file mode 100644 +index 000000000000..be1f19a89737 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) ++ ++define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) ++ ret <8 x float> %res ++} ++ ++declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) ++ ++define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) ++ ret <4 x double> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll +new file mode 100644 +index 000000000000..93056b272dfc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll +new file mode 100644 +index 000000000000..0fa8c94adc60 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) ++ ++define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) ++ ++define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll +new file mode 100644 +index 000000000000..a0cb309c54e1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll +new file mode 100644 +index 000000000000..c537ffa66ba7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll +new file mode 100644 +index 000000000000..40abdf497605 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll +new file mode 100644 +index 000000000000..dd38301d0534 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll +new file mode 100644 +index 000000000000..839fbc9990d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll +new file mode 100644 +index 000000000000..b73b32ebd3b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll +new file mode 100644 +index 000000000000..bb6ef0cc6574 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll +new file mode 100644 +index 000000000000..fb2c6206da7b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll +new file mode 100644 +index 000000000000..9217d1f6a05d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll +new file mode 100644 +index 000000000000..8d6d1c694193 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll +new file mode 100644 +index 000000000000..5b10aca9801d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll +new file mode 100644 +index 000000000000..903bc10d88b7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll +new file mode 100644 +index 000000000000..bf8205376a6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll +new file mode 100644 +index 000000000000..b5368a86b5c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll +new file mode 100644 +index 000000000000..18803767d6c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll +@@ -0,0 +1,97 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll +new file mode 100644 +index 000000000000..3f5d4d631671 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll +@@ -0,0 +1,55 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) ++ ++define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) ++ ++define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) ++ ++define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll +new file mode 100644 +index 000000000000..dc0567da4e47 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll +new file mode 100644 +index 000000000000..a2cedc8d3ef3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll +new file mode 100644 +index 000000000000..15b522d5e7e3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll +new file mode 100644 +index 000000000000..fefee7246ae6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll +new file mode 100644 +index 000000000000..bedbfc4889d2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll +new file mode 100644 +index 000000000000..3c17f2b6090a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll +new file mode 100644 +index 000000000000..e417e3cc5bbf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll +new file mode 100644 +index 000000000000..15fed7966f1c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll +new file mode 100644 +index 000000000000..83e977827e2d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll +new file mode 100644 +index 000000000000..eb577a29fb33 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll +new file mode 100644 +index 000000000000..3ab02dcb97ed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll +new file mode 100644 +index 000000000000..bc085aeaa232 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll +new file mode 100644 +index 000000000000..9e7c94305630 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll +new file mode 100644 +index 000000000000..66d800470003 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll +new file mode 100644 +index 000000000000..52621ddc6f49 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll +new file mode 100644 +index 000000000000..5663e3475b12 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..2d65a75b175a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll +new file mode 100644 +index 000000000000..82da0d21d013 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll +new file mode 100644 +index 000000000000..e10d5d7bd488 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll +new file mode 100644 +index 000000000000..a928cc2de8c8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll +new file mode 100644 +index 000000000000..42cd6ac99754 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll +new file mode 100644 +index 000000000000..f050e7d79b0f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll +new file mode 100644 +index 000000000000..26be21a83aa4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll +new file mode 100644 +index 000000000000..72da2a746dd5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..cd778e2c0627 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll +new file mode 100644 +index 000000000000..a10c54329149 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll +new file mode 100644 +index 000000000000..0177f2b77b93 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvst: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) ++ ret void ++} ++ ++define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvst: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll +new file mode 100644 +index 000000000000..c19207aad6b8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) ++ ++define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll +new file mode 100644 +index 000000000000..0ea2484e090d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll +@@ -0,0 +1,121 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll +new file mode 100644 +index 000000000000..42c7c0da1746 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) ++ ++define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll +new file mode 100644 +index 000000000000..810008c17f7e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) ++ ret <16 x i16> %res ++} ++ ++define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) ++ ret <8 x i32> %res ++} ++ ++define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) ++ ret <4 x i64> %res ++} ++ ++define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll +new file mode 100644 +index 000000000000..924b89ce9d6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} ++ ++declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) ++ ++define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) ++ ret <16 x i16> %res ++} ++ ++declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) ++ ++define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) ++ ret <8 x i32> %res ++} ++ ++declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) ++ ++define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) ++ ret <4 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll +new file mode 100644 +index 000000000000..0170d204cf42 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) ++ ret <32 x i8> %res ++} ++ ++define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) ++ ret <32 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll +new file mode 100644 +index 000000000000..1478f691a1cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s ++ ++declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) ++ ++define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) ++ ret <32 x i8> %res ++} +-- +2.20.1 + diff --git a/0017-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch b/0017-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7e852ae135252b3dfcc1102280439033a4a4e95 --- /dev/null +++ b/0017-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch @@ -0,0 +1,1221 @@ +From 91c9df5a4deae4ab63953674880493b9764989ad Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:15:19 +0800 +Subject: [PATCH 17/66] [LoongArch][MC] Add invalid immediate testcases for LSX + instructions + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157573 + +(cherry picked from commit 2f4b6695836e16ec075061cd2508444bd403ad7d) + +Change-Id: Ifb4cfa02e6fba6d7e55405aa7e1fc33ceb8382e9 +--- + llvm/test/MC/LoongArch/lsx/invalid-imm.s | 1149 +++++++++++++++++++++- + 1 file changed, 1143 insertions(+), 6 deletions(-) + +diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s +index fb7e24c83488..c3f9aaa08281 100644 +--- a/llvm/test/MC/LoongArch/lsx/invalid-imm.s ++++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s +@@ -3,53 +3,1190 @@ + # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + + ## uimm1 ++vstelm.d $vr0, $a0, 8, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vstelm.d $vr0, $a0, 8, 2 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vreplvei.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ + vreplvei.d $vr0, $vr1, 2 + # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + ++vpickve2gr.du $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.du $a0, $vr1, 2 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.d $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] ++ ++vpickve2gr.d $a0, $vr1, 2 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] ++ ++vinsgr2vr.d $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++vinsgr2vr.d $vr0, $a0, 2 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] ++ ++## uimm2 ++vstelm.w $vr0, $a0, 4, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vstelm.w $vr0, $a0, 4, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vreplvei.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vreplvei.w $vr0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.wu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.wu $a0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.w $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++vpickve2gr.w $a0, $vr1, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++vinsgr2vr.w $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++vinsgr2vr.w $vr0, $a0, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++## uimm3 ++vstelm.h $vr0, $a0, 2, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vstelm.h $vr0, $a0, 2, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vreplvei.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vreplvei.h $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.hu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.hu $a0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.h $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vpickve2gr.h $a0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vinsgr2vr.h $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vinsgr2vr.h $vr0, $a0, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitrevi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitrevi.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitseti.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitseti.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitclri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vbitclri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++vsrari.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrari.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrlri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrlri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.hu.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.hu.bu $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.h.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vsllwil.h.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++vrotri.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vrotri.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++vsrai.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrai.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsrli.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vslli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vslli.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsat.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] ++ ++vsat.b $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] ++ ++vsat.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++vsat.bu $vr0, $vr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ + ## uimm4 ++vstelm.b $vr0, $a0, 1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vstelm.b $vr0, $a0, 1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vreplvei.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vreplvei.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.bu $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.bu $a0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.b $a0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vpickve2gr.b $a0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vinsgr2vr.b $vr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vinsgr2vr.b $vr0, $a0, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitrevi.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitrevi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitseti.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitseti.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitclri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vbitclri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrarni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlrni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.bu.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.bu.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrani.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vssrlni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrarni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrarni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrlrni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrlrni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsrani.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrani.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrlni.b.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrlni.b.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] ++ ++vsrari.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrari.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrlri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrlri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.wu.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.wu.hu $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.w.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vsllwil.w.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++vrotri.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vrotri.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++vsrai.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrai.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrli.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsrli.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vslli.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vslli.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsat.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] ++ + vsat.h $vr0, $vr1, 16 + # CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] + ++vsat.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++vsat.hu $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ ++## uimm5 ++vbsrl.v $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsrl.v $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsll.v $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vbsll.v $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslti.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslti.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vslei.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.h $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vfrstpi.b $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++vbitrevi.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitrevi.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitseti.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitseti.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitclri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vbitclri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrarni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlrni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.hu.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.hu.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrani.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vssrlni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrarni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrarni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrlrni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrlrni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsrani.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrani.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrlni.h.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrlni.h.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++vsrari.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrari.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrlri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrlri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.du.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.du.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.d.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vsllwil.d.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++vrotri.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vrotri.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsrai.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrai.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrli.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsrli.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslli.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vslli.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vaddi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsubi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmaxi.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.bu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.bu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.hu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.hu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vmini.du $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++vsat.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] ++ ++vsat.w $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] ++ ++vsat.wu $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++vsat.wu $vr0, $vr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ + ## simm5 ++vslti.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslti.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vslei.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vseqi.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ + vseqi.b $vr0, $vr1, 16 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + ++vmaxi.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmaxi.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.b $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.b $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.h $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.h $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.w $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.w $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.d $vr0, $vr1, -17 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++vmini.d $vr0, $vr1, 16 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] ++ ++## uimm6 ++vbitrevi.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitrevi.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitseti.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitseti.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitclri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vbitclri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrarni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlrni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.wu.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.wu.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrani.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vssrlni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrarni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrarni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrlrni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrlrni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++vsrani.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrani.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrlni.w.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrlni.w.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] ++ ++vsrari.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrari.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrlri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrlri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vrotri.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vrotri.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++vsrai.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrai.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrli.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsrli.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vslli.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vslli.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsat.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] ++ ++vsat.d $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] ++ ++vsat.du $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++vsat.du $vr0, $vr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ + ## uimm7 ++vssrarni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrarni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlrni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.du.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.du.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrani.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vssrlni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrarni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrarni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrlrni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrlrni.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++vsrani.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ ++vsrani.d.q $vr0, $vr1, 128 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ ++vsrlni.d.q $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] ++ + vsrlni.d.q $vr0, $vr1, 128 + # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +-## simm8 ++## uimm8 ++vextrins.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.d $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.w $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.h $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vextrins.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vpermi.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ + vpermi.w $vr0, $vr1, 256 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + ++vshuf4i.d $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.d $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.w $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.w $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.h $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.h $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vshuf4i.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++vbitseli.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vbitseli.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++vandi.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vandi.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] ++ ++vori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] ++ ++vxori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vxori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vnori.b $vr0, $vr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++vnori.b $vr0, $vr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++## simm8 ++vstelm.b $vr0, $a0, -129, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] ++ ++vstelm.b $vr0, $a0, 128, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] ++ + ## simm8_lsl1 +-vstelm.h $vr0, $a0, 255, 1 ++vstelm.h $vr0, $a0, -258, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] ++ ++vstelm.h $vr0, $a0, 256, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] + + ## simm8_lsl2 +-vstelm.w $vr0, $a0, 512, 1 ++vstelm.w $vr0, $a0, -516, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + +-## simm10 +-vrepli.b $vr0, 512 +-# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++vstelm.w $vr0, $a0, 512, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + + ## simm8_lsl3 ++vstelm.d $vr0, $a0, -1032, 1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] ++ + vstelm.d $vr0, $a0, 1024, 1 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] + + ## simm9_lsl3 ++vldrepl.d $vr0, $a0, -2056 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] ++ + vldrepl.d $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] + + ## simm10_lsl2 ++vldrepl.w $vr0, $a0, -2052 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] ++ + vldrepl.w $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] + ++## simm10 ++vrepli.b $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.b $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.h $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.h $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.w $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.w $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.d $vr0, -513 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ ++vrepli.d $vr0, 512 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] ++ + ## simm11_lsl1 ++vldrepl.h $vr0, $a0, -2050 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] ++ + vldrepl.h $vr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] + ++## simm12 ++vldrepl.b $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] ++ ++vldrepl.b $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] ++ ++vst $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vst $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vld $vr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ ++vld $vr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] ++ + ## simm13 ++vldi $vr0, -4097 ++# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] ++ + vldi $vr0, 4096 + # CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] +-- +2.20.1 + diff --git a/0018-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch b/0018-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch new file mode 100644 index 0000000000000000000000000000000000000000..b02fb4d3903abfcd3272b8dee5821135105f5eb7 --- /dev/null +++ b/0018-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch @@ -0,0 +1,1221 @@ +From fee5433c9cfe7a63735b26f7e0ef2930a78930f1 Mon Sep 17 00:00:00 2001 +From: chenli +Date: Sat, 19 Aug 2023 17:16:09 +0800 +Subject: [PATCH 18/66] [LoongArch][MC] Add invalid immediate testcases for + LASX instructions + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157574 + +(cherry picked from commit d163ae8c255f663707d4b0d5de03fcb18274b3eb) + +Change-Id: I7350fad4f87bc20f5dfa95db252b15f3edc9a3ba +--- + llvm/test/MC/LoongArch/lasx/invalid-imm.s | 1149 ++++++++++++++++++++- + 1 file changed, 1143 insertions(+), 6 deletions(-) + +diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s +index 5c61a7a42009..6f64a6f87802 100644 +--- a/llvm/test/MC/LoongArch/lasx/invalid-imm.s ++++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s +@@ -3,53 +3,1190 @@ + # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + + ## uimm1 ++xvrepl128vei.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] ++ + xvrepl128vei.d $xr0, $xr1, 2 + # CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] + ++## uimm2 ++xvpickve.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve.d $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsve0.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsve0.d $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] ++ ++xvinsgr2vr.d $xr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvinsgr2vr.d $xr0, $a0, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.d $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.d $a0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.du $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] ++ ++xvpickve2gr.du $a0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] ++ ++xvstelm.d $xr0, $a0, 8, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvstelm.d $xr0, $a0, 8, 4 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] ++ ++xvrepl128vei.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] ++ ++xvrepl128vei.w $xr0, $xr1, 4 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] ++ ++## uimm3 ++xvpickve.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve.w $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsve0.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsve0.w $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] ++ ++xvinsgr2vr.w $xr0, $a0, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvinsgr2vr.w $xr0, $a0, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.wu $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.wu $a0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.w $a0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvpickve2gr.w $a0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvstelm.w $xr0, $a0, 4, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvstelm.w $xr0, $a0, 4, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvrepl128vei.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvrepl128vei.h $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvbitrevi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitrevi.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitseti.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitseti.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitclri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvbitclri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] ++ ++xvsrari.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrari.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrlri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrlri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.hu.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.hu.bu $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.h.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvsllwil.h.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] ++ ++xvrotri.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvrotri.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] ++ ++xvsrai.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrai.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsrli.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvslli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvslli.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.b $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ ++xvsat.bu $xr0, $xr1, 8 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] ++ + ## uimm4 ++xvstelm.h $xr0, $a0, 2, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvstelm.h $xr0, $a0, 2, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvrepl128vei.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvrepl128vei.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvbitrevi.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitrevi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitseti.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitseti.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitclri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvbitclri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrarni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlrni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.bu.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.bu.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrani.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvssrlni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrarni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrarni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlrni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlrni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsrani.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrani.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlni.b.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlni.b.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] ++ ++xvsrari.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrari.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrlri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.wu.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.wu.hu $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.w.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvsllwil.w.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] ++ ++xvrotri.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvrotri.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] ++ ++xvsrai.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrai.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrli.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsrli.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvslli.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvslli.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsat.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] ++ + xvsat.h $xr0, $xr1, 16 + # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + ++xvsat.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++xvsat.hu $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] ++ ++## uimm5 ++xvstelm.b $xr0, $a0, 1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvstelm.b $xr0, $a0, 1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbsrl.v $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsrl.v $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsll.v $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvbsll.v $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslti.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvslei.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.h $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvfrstpi.b $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] ++ ++xvbitrevi.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitrevi.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitseti.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitseti.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitclri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvbitclri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrarni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlrni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.hu.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.hu.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrani.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvssrlni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrarni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrarni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlrni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlrni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsrani.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrani.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlni.h.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlni.h.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] ++ ++xvsrari.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrari.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrlri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.du.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.du.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.d.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvsllwil.d.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] ++ ++xvrotri.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvrotri.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsrai.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrai.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrli.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsrli.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslli.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvslli.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvaddi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsubi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmaxi.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.bu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.bu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.hu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.hu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvmini.du $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.w $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.wu $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ ++xvsat.wu $xr0, $xr1, 32 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] ++ + ## simm5 ++xvslti.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslti.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvslei.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvseqi.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ + xvseqi.b $xr0, $xr1, 16 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + ++xvmaxi.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmaxi.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.b $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.b $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.h $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.h $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.w $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.w $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.d $xr0, $xr1, -17 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++xvmini.d $xr0, $xr1, 16 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] ++ ++## uimm6 ++xvbitrevi.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitrevi.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitseti.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitseti.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitclri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvbitclri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrarni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlrni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.wu.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.wu.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrani.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvssrlni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrarni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrarni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlrni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlrni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] ++ ++xvsrani.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrani.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlni.w.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlni.w.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] ++ ++xvsrari.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrari.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrlri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvrotri.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvrotri.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] ++ ++xvsrai.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrai.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrli.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsrli.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvslli.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvslli.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.d $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.du $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ ++xvsat.du $xr0, $xr1, 64 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] ++ + ## uimm7 ++xvssrarni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrarni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlrni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.du.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.du.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrani.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvssrlni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrarni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrarni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlrni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlrni.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] ++ ++xvsrani.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++xvsrani.d.q $xr0, $xr1, 128 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ ++xvsrlni.d.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] ++ + xvsrlni.d.q $xr0, $xr1, 128 + # CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +-## simm8 ++## uimm8 ++xvextrins.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.w $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.h $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvextrins.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.q $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.q $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ ++xvpermi.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] ++ + xvpermi.w $xr0, $xr1, 256 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + ++xvshuf4i.d $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.d $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.w $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.w $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.h $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.h $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvshuf4i.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] ++ ++xvbitseli.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvbitseli.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] ++ ++xvandi.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvandi.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++xvori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] ++ ++xvxori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvxori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvnori.b $xr0, $xr1, -1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++xvnori.b $xr0, $xr1, 256 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] ++ ++## simm8 ++xvstelm.b $xr0, $a0, -129, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] ++ ++xvstelm.b $xr0, $a0, 128, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] ++ + ## simm8_lsl1 +-xvstelm.h $xr0, $a0, 255, 1 ++xvstelm.h $xr0, $a0, -258, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] ++ ++xvstelm.h $xr0, $a0, 256, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] + + ## simm8_lsl2 +-xvstelm.w $xr0, $a0, 512, 1 ++xvstelm.w $xr0, $a0, -516, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + +-## simm10 +-xvrepli.b $xr0, 512 +-# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++xvstelm.w $xr0, $a0, 512, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + + ## simm8_lsl3 ++xvstelm.d $xr0, $a0, -1032, 1 ++# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] ++ + xvstelm.d $xr0, $a0, 1024, 1 + # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] + + ## simm9_lsl3 ++xvldrepl.d $xr0, $a0, -2056 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] ++ + xvldrepl.d $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] + + ## simm10_lsl2 ++xvldrepl.w $xr0, $a0, -2052 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] ++ + xvldrepl.w $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] + ++## simm10 ++xvrepli.b $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.b $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.h $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.h $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.w $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.w $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.d $xr0, -513 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ ++xvrepli.d $xr0, 512 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] ++ + ## simm11_lsl1 ++xvldrepl.h $xr0, $a0, -2050 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] ++ + xvldrepl.h $xr0, $a0, 2048 + # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] + ++## simm12 ++xvldrepl.b $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvldrepl.b $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvst $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvst $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvld $xr0, $a0, -2049 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ ++xvld $xr0, $a0, 2048 ++# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] ++ + ## simm13 ++xvldi $xr0, -4097 ++# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] ++ + xvldi $xr0, 4096 + # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] +-- +2.20.1 + diff --git a/0019-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch b/0019-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch new file mode 100644 index 0000000000000000000000000000000000000000..6a55056c890bc2162995237db6324c28a5e91b01 --- /dev/null +++ b/0019-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch @@ -0,0 +1,5695 @@ +From 155078666b57e519a22f2700eb7d1a595141954c Mon Sep 17 00:00:00 2001 +From: chenli +Date: Mon, 21 Aug 2023 11:03:49 +0800 +Subject: [PATCH 19/66] [LoongArch] Add testcases of LSX intrinsics with + immediates + +The testcases mainly cover three situations: +- the arguments which should be immediates are non immediates. +- the immediate is out of upper limit of the argument type. +- the immediate is out of lower limit of the argument type. + +Depends on D155829 + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D157570 + +(cherry picked from commit 0c76f46ca676ebecbdf2c9f7e8b05421a234bbed) + +Change-Id: Ib4aab73a41f64b4500a833892e181124309f30aa +--- + .../lsx/intrinsic-addi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-addi-non-imm.ll | 37 +++++ + .../lsx/intrinsic-andi-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-andi-non-imm.ll | 10 ++ + .../lsx/intrinsic-bitclr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitclr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bitrev-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitrev-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bitseli-invalid-imm.ll | 17 +++ + .../lsx/intrinsic-bitseli-non-imm.ll | 10 ++ + .../lsx/intrinsic-bitset-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-bitset-non-imm.ll | 37 +++++ + .../lsx/intrinsic-bsll-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-bsll-non-imm.ll | 10 ++ + .../lsx/intrinsic-bsrl-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-bsrl-non-imm.ll | 10 ++ + .../lsx/intrinsic-extrins-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-extrins-non-imm.ll | 37 +++++ + .../lsx/intrinsic-frstp-invalid-imm.ll | 33 +++++ + .../LoongArch/lsx/intrinsic-frstp-non-imm.ll | 19 +++ + .../lsx/intrinsic-insgr2vr-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-insgr2vr-non-imm.ll | 37 +++++ + .../LoongArch/lsx/intrinsic-ld-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-ld-non-imm.ll | 10 ++ + .../lsx/intrinsic-ldi-invalid-imm.ll | 81 +++++++++++ + .../LoongArch/lsx/intrinsic-ldi-non-imm.ll | 46 +++++++ + .../lsx/intrinsic-ldrepl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-ldrepl-non-imm.ll | 37 +++++ + .../lsx/intrinsic-max-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-max-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-min-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-min-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-nori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-nori-non-imm.ll | 10 ++ + .../lsx/intrinsic-ori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-ori-non-imm.ll | 10 ++ + .../lsx/intrinsic-permi-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-permi-non-imm.ll | 10 ++ + .../lsx/intrinsic-pickve2gr-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-pickve2gr-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-replvei-invalid-imm.ll | 65 +++++++++ + .../lsx/intrinsic-replvei-non-imm.ll | 37 +++++ + .../lsx/intrinsic-rotr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-rotr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sat-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-sat-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-seq-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-seq-non-imm.ll | 37 +++++ + .../lsx/intrinsic-shuf4i-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-shuf4i-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sle-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-sle-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-sll-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-sll-non-imm.ll | 37 +++++ + .../lsx/intrinsic-sllwil-invalid-imm.ll | 97 +++++++++++++ + .../LoongArch/lsx/intrinsic-sllwil-non-imm.ll | 55 ++++++++ + .../lsx/intrinsic-slt-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-slt-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-sra-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-sra-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srani-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srani-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srar-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srar-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srarni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srarni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srl-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srl-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlr-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlr-non-imm.ll | 37 +++++ + .../lsx/intrinsic-srlrni-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-srlrni-non-imm.ll | 37 +++++ + .../lsx/intrinsic-ssrani-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-ssrani-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrarni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-ssrarni-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrlni-invalid-imm.ll | 129 ++++++++++++++++++ + .../LoongArch/lsx/intrinsic-ssrlni-non-imm.ll | 73 ++++++++++ + .../lsx/intrinsic-ssrlrni-invalid-imm.ll | 129 ++++++++++++++++++ + .../lsx/intrinsic-ssrlrni-non-imm.ll | 73 ++++++++++ + .../LoongArch/lsx/intrinsic-st-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-st-non-imm.ll | 10 ++ + .../lsx/intrinsic-stelm-invalid-imm.ll | 121 ++++++++++++++++ + .../LoongArch/lsx/intrinsic-stelm-non-imm.ll | 65 +++++++++ + .../lsx/intrinsic-subi-invalid-imm.ll | 65 +++++++++ + .../LoongArch/lsx/intrinsic-subi-non-imm.ll | 37 +++++ + .../lsx/intrinsic-xori-invalid-imm.ll | 17 +++ + .../LoongArch/lsx/intrinsic-xori-non-imm.ll | 10 ++ + 90 files changed, 4949 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll +new file mode 100644 +index 000000000000..6875872b6f83 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll +new file mode 100644 +index 000000000000..87d32b3ce02a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll +new file mode 100644 +index 000000000000..82a117b2aba5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll +new file mode 100644 +index 000000000000..c0c35c775266 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll +new file mode 100644 +index 000000000000..b020806cd86c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll +new file mode 100644 +index 000000000000..df6cdb99cdbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll +new file mode 100644 +index 000000000000..24b6ec3284cb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll +new file mode 100644 +index 000000000000..3ffb494c9907 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll +new file mode 100644 +index 000000000000..bc63b40e9fca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll +new file mode 100644 +index 000000000000..52c1eb7d2024 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll +new file mode 100644 +index 000000000000..e57e14d8cb07 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll +new file mode 100644 +index 000000000000..9b2bde015ed9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll +new file mode 100644 +index 000000000000..eb49af49c9be +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll +new file mode 100644 +index 000000000000..5b10c9e91a4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll +new file mode 100644 +index 000000000000..bf56822e2ef5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll +new file mode 100644 +index 000000000000..0bc038c869ce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll +new file mode 100644 +index 000000000000..7f94234ed603 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll +new file mode 100644 +index 000000000000..e834002bb60b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll +new file mode 100644 +index 000000000000..0184c855c9c1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll +@@ -0,0 +1,33 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll +new file mode 100644 +index 000000000000..9583f672a305 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll +@@ -0,0 +1,19 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll +new file mode 100644 +index 000000000000..3d4f84fb6e03 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll +new file mode 100644 +index 000000000000..2a4c2218de8c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) ++ ++define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) ++ ++define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) ++ ++define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) ++ ++define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll +new file mode 100644 +index 000000000000..3aeb30ce66b4 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vld: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vld: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll +new file mode 100644 +index 000000000000..db6a0318d87a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) ++ ++define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll +new file mode 100644 +index 000000000000..57f6f8e81d91 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll +@@ -0,0 +1,81 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vldi: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vldi_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vldi: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vrepli_b_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vrepli_h_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vrepli_w_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d_lo() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vrepli_d_hi() nounwind { ++; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll +new file mode 100644 +index 000000000000..a8f8278f8097 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll +@@ -0,0 +1,46 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) ++ ++define <2 x i64> @lsx_vldi(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) ++ ++define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) ++ ++define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) ++ ++define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) ++ ++define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll +new file mode 100644 +index 000000000000..cb640e1245da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll +new file mode 100644 +index 000000000000..e60b21913c69 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) ++ ++define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) ++ ++define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) ++ ++define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) ++ ++define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll +new file mode 100644 +index 000000000000..667ba32723fc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll +new file mode 100644 +index 000000000000..34bbe3495670 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll +new file mode 100644 +index 000000000000..b73bada4f06f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll +new file mode 100644 +index 000000000000..5d9b98cec4d0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll +new file mode 100644 +index 000000000000..8c59d8fb9fa5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll +new file mode 100644 +index 000000000000..322a39c106a6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll +new file mode 100644 +index 000000000000..4a7fc7e109d9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll +new file mode 100644 +index 000000000000..5644b8581dce +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll +new file mode 100644 +index 000000000000..e439bbae6130 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll +new file mode 100644 +index 000000000000..bdfc08ed680a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll +new file mode 100644 +index 000000000000..3430c54d2194 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) ++ ret i32 %res ++} ++ ++define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) ++ ret i64 %res ++} ++ ++define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll +new file mode 100644 +index 000000000000..6dd3c1f27a81 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) ++ ret i64 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) ++ ++define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) ++ ++define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) ++ ++define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) ++ ret i32 %res ++} ++ ++declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) ++ ++define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) ++ ret i64 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll +new file mode 100644 +index 000000000000..d625441122a6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll +new file mode 100644 +index 000000000000..3d271bb2b307 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll +new file mode 100644 +index 000000000000..3c53b36672ad +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll +new file mode 100644 +index 000000000000..fd8ba3a1c633 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll +new file mode 100644 +index 000000000000..45fa4e43be19 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll +new file mode 100644 +index 000000000000..afdbe0c1ce0b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll +new file mode 100644 +index 000000000000..220398ff28cd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll +new file mode 100644 +index 000000000000..5fa1dd30475c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll +new file mode 100644 +index 000000000000..4d6fadf08c26 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll +new file mode 100644 +index 000000000000..a7d138bcc00b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll +new file mode 100644 +index 000000000000..4c945e296711 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll +new file mode 100644 +index 000000000000..0fc137bf0549 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll +new file mode 100644 +index 000000000000..75406f94887c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll +new file mode 100644 +index 000000000000..7474b5e29734 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll +new file mode 100644 +index 000000000000..bda3523a0b5c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll +@@ -0,0 +1,97 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll +new file mode 100644 +index 000000000000..a03656d5ca07 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll +@@ -0,0 +1,55 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) ++ ++define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) ++ ++define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) ++ ++define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll +new file mode 100644 +index 000000000000..f6d014b19d6c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll +new file mode 100644 +index 000000000000..9a8b757dab4e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll +new file mode 100644 +index 000000000000..2a033a21b565 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll +new file mode 100644 +index 000000000000..c3b328145864 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll +new file mode 100644 +index 000000000000..d68064e9b902 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll +new file mode 100644 +index 000000000000..38cfde214dc1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll +new file mode 100644 +index 000000000000..b6c2d70cebbc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll +new file mode 100644 +index 000000000000..2ad8adcd823b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll +new file mode 100644 +index 000000000000..d24cf92a0392 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll +new file mode 100644 +index 000000000000..19de7445cba1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll +new file mode 100644 +index 000000000000..3beff790afab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll +new file mode 100644 +index 000000000000..98652aca0d62 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll +new file mode 100644 +index 000000000000..054c4f393548 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll +new file mode 100644 +index 000000000000..76341df197fd +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll +new file mode 100644 +index 000000000000..bcbd38e26e5f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll +new file mode 100644 +index 000000000000..4862b1546ccf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..8988ae88f9eb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll +new file mode 100644 +index 000000000000..e5530db56fed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll +new file mode 100644 +index 000000000000..f7817921ebeb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll +new file mode 100644 +index 000000000000..a80ede9c5243 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll +new file mode 100644 +index 000000000000..4edda8c0a24a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll +new file mode 100644 +index 000000000000..a77e6e764c9d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll +new file mode 100644 +index 000000000000..6218af1fa773 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll +new file mode 100644 +index 000000000000..688be826f467 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll +new file mode 100644 +index 000000000000..98a0c5b3cd28 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll +@@ -0,0 +1,129 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { ++; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll +new file mode 100644 +index 000000000000..c389b4fd6023 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll +@@ -0,0 +1,73 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} ++ ++declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) ++ ++define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) ++ ++define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) ++ ++define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) ++ ++define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll +new file mode 100644 +index 000000000000..64518380964b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vst: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) ++ ret void ++} ++ ++define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vst: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll +new file mode 100644 +index 000000000000..119ed9b78658 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) ++ ++define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll +new file mode 100644 +index 000000000000..277abcbd34cc +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll +@@ -0,0 +1,121 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) ++ ret void ++} ++ ++define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) ++ ret void ++} ++ ++define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) ++ ret void ++} ++ ++define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { ++; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll +new file mode 100644 +index 000000000000..f53932f79035 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) ++ ++define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) ++ ++define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) ++ ++define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) ++ ret void ++} ++ ++declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) ++ ++define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) ++ ret void ++} ++ ++define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll +new file mode 100644 +index 000000000000..96cc1241fbf3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll +@@ -0,0 +1,65 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll +new file mode 100644 +index 000000000000..162f9ad131c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll +@@ -0,0 +1,37 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} ++ ++declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) ++ ++define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) ++ ret <8 x i16> %res ++} ++ ++declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) ++ ++define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) ++ ret <4 x i32> %res ++} ++ ++declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) ++ ++define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) ++ ret <2 x i64> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll +new file mode 100644 +index 000000000000..5f5613189ac8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll +@@ -0,0 +1,17 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { ++; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) ++ ret <16 x i8> %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll +new file mode 100644 +index 000000000000..4238d89120f1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll +@@ -0,0 +1,10 @@ ++; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s ++ ++declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) ++ ++define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { ++; CHECK: immarg operand has non-immediate parameter ++entry: ++ %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) ++ ret <16 x i8> %res ++} +-- +2.20.1 + diff --git a/0020-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch b/0020-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch new file mode 100644 index 0000000000000000000000000000000000000000..df01c951489c74588a9052e7711a68b29339c35c --- /dev/null +++ b/0020-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch @@ -0,0 +1,282 @@ +From 9869322d067e8cec3d9a6417b4978fde7f4e71e4 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 23 Aug 2023 15:28:00 +0800 +Subject: [PATCH 20/66] [LoongArch] Reorder LoongArchTargetLowering(). NFC + +(cherry picked from commit 3693909ca47f1fafc97b441c91f5656acdd3907c) + +Change-Id: I4682465ce4c2b0cca7cfcc8cef5dc4d3406b10c1 + +[LoongArch] Fix Subtarget.is64Bit + +(cherry picked from commit 749f36dae311000e1d69351707f4f24a72090c94) + +Change-Id: Ibf180c0aa3ee644044fcc71fa4a2282ce1314a02 +--- + .../LoongArch/LoongArchISelLowering.cpp | 152 ++++++++++-------- + 1 file changed, 82 insertions(+), 70 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3a40cd06a3eb..2f8ce57d3f5f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -47,20 +47,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + : TargetLowering(TM), Subtarget(STI) { + + MVT GRLenVT = Subtarget.getGRLenVT(); ++ + // Set up the register classes. ++ + addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); + if (Subtarget.hasBasicF()) + addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); + if (Subtarget.hasBasicD()) + addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); +- if (Subtarget.hasExtLSX()) +- for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, +- MVT::v2i64}) +- addRegisterClass(VT, &LoongArch::LSX128RegClass); +- if (Subtarget.hasExtLASX()) +- for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, +- MVT::v4i64}) +- addRegisterClass(VT, &LoongArch::LASX256RegClass); + + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; +@@ -75,38 +69,57 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + for (MVT VT : LASXVTs) + addRegisterClass(VT, &LoongArch::LASX256RegClass); + ++ // Set operations for LA32 and LA64. ++ + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + +- // TODO: add necessary setOperationAction calls later. + setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); + setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + setOperationAction(ISD::ROTL, GRLenVT, Expand); + setOperationAction(ISD::CTPOP, GRLenVT, Expand); +- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); +- setOperationAction(ISD::TRAP, MVT::Other, Legal); +- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, +- ISD::JumpTable}, ++ ISD::JumpTable, ISD::GlobalTLSAddress}, + GRLenVT, Custom); + +- setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); +- +- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); +- +- setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); +- if (Subtarget.is64Bit()) +- setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); ++ setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); + + setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); + setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + ++ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); ++ setOperationAction(ISD::TRAP, MVT::Other, Legal); ++ ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); ++ ++ // Expand bitreverse.i16 with native-width bitrev and shift for now, before ++ // we get to know which of sll and revb.2h is faster. ++ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); ++ setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); ++ ++ // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and ++ // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 ++ // and i32 could still be byte-swapped relatively cheaply. ++ setOperationAction(ISD::BSWAP, MVT::i16, Custom); ++ ++ setOperationAction(ISD::BR_JT, MVT::Other, Expand); ++ setOperationAction(ISD::BR_CC, GRLenVT, Expand); ++ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); ++ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); ++ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); ++ ++ setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); ++ setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); ++ ++ // Set operations for LA64 only. ++ + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); +@@ -117,50 +130,39 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::CTTZ, MVT::i32, Custom); + setOperationAction(ISD::CTLZ, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); +- if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) +- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); +- if (Subtarget.hasBasicF()) +- setOperationAction(ISD::FRINT, MVT::f32, Legal); +- if (Subtarget.hasBasicD()) +- setOperationAction(ISD::FRINT, MVT::f64, Legal); +- } ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); + +- // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and +- // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 +- // and i32 could still be byte-swapped relatively cheaply. +- setOperationAction(ISD::BSWAP, MVT::i16, Custom); +- if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + } + +- // Expand bitreverse.i16 with native-width bitrev and shift for now, before +- // we get to know which of sll and revb.2h is faster. +- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); +- if (Subtarget.is64Bit()) { +- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); +- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); +- } else { +- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ // Set operations for LA32 only. ++ ++ if (!Subtarget.is64Bit()) { + setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); +- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ ++ // Set libcalls. ++ setLibcallName(RTLIB::MUL_I128, nullptr); + } + + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; + ++ // Set operations for 'F' feature. ++ + if (Subtarget.hasBasicF()) { + setCondCodeAction(FPCCToExpand, MVT::f32, Expand); ++ + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Legal); +@@ -173,14 +175,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); ++ ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::FRINT, MVT::f32, Legal); ++ ++ if (!Subtarget.hasBasicD()) { ++ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); ++ } ++ } + } ++ ++ // Set operations for 'D' feature. ++ + if (Subtarget.hasBasicD()) { ++ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); ++ setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setCondCodeAction(FPCCToExpand, MVT::f64, Expand); ++ + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); +- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); +@@ -189,35 +207,35 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); +- setTruncStoreAction(MVT::f64, MVT::f32, Expand); +- } +- +- setOperationAction(ISD::BR_JT, MVT::Other, Expand); + +- setOperationAction(ISD::BR_CC, GRLenVT, Expand); +- setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); +- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); +- setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); +- if (!Subtarget.is64Bit()) +- setLibcallName(RTLIB::MUL_I128, nullptr); +- +- setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); +- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); +- if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && +- !Subtarget.hasBasicD())) { +- setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); +- setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + ++ // Set operations for 'LSX' feature. ++ + if (Subtarget.hasExtLSX()) + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); + ++ // Set operations for 'LASX' feature. ++ + if (Subtarget.hasExtLASX()) + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, + {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, + Legal); + ++ // Set DAG combine for LA32 and LA64. ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRL); ++ ++ // Set DAG combine for 'LSX' feature. ++ ++ if (Subtarget.hasExtLSX()) ++ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); ++ + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget.getRegisterInfo()); + +@@ -235,12 +253,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); + setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); + setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); +- +- setTargetDAGCombine(ISD::AND); +- setTargetDAGCombine(ISD::OR); +- setTargetDAGCombine(ISD::SRL); +- if (Subtarget.hasExtLSX()) +- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + } + + bool LoongArchTargetLowering::isOffsetFoldingLegal( +-- +2.20.1 + diff --git a/0021-LoongArch-Fix-typos.-NFC.patch b/0021-LoongArch-Fix-typos.-NFC.patch new file mode 100644 index 0000000000000000000000000000000000000000..21ab98cfae02514b5e1008896bbad3e0115c663b --- /dev/null +++ b/0021-LoongArch-Fix-typos.-NFC.patch @@ -0,0 +1,301 @@ +From 7768b478443d9706b7ac3a0897471ef0951ad3d6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 29 Aug 2023 19:16:20 +0800 +Subject: [PATCH 21/66] [LoongArch] Fix typos. NFC + +(cherry picked from commit 30b6b27385f8ddc550df54a097434a121ae56d12) + +Change-Id: I8bdec6b4c04359a4e7c4f28d3fb019f1e4a187c5 +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 52 +++++++++---------- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 +++++++++--------- + 2 files changed, 51 insertions(+), 51 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a3afd4789dfc..947950be2b8f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1545,10 +1545,10 @@ foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + // Pat<(Intrinsic timm:$imm) + // (LAInst timm:$imm)>; + def : Pat<(int_loongarch_lasx_xvldi timm:$imm), +- (XVLDI (to_valide_timm timm:$imm))>; ++ (XVLDI (to_valid_timm timm:$imm))>; + foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in + def : Pat<(deriveLASXIntrinsic.ret timm:$imm), +- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xj, timm:$imm) +@@ -1558,25 +1558,25 @@ foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", + "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", + "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", + "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", + "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", + "XVREPL128VEI_H", "XVSHUF4I_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", + "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", + "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", + "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", + "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", + "XVPICKVE2GR_D", "XVPICKVE2GR_DU", + "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), +- (!cast(Inst) LASX256:$xj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) +@@ -1588,7 +1588,7 @@ foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", + "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", +@@ -1596,7 +1596,7 @@ foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", + "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", +@@ -1604,7 +1604,7 @@ foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", + "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", +@@ -1612,7 +1612,7 @@ foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + + // vty: v32i8/v16i16/v8i32/v4i64 + // Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +@@ -1693,42 +1693,42 @@ foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_ + (!cast(Inst) LASX256:$xj)>; + + def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), +- (XVPICKVE_W v8f32:$xj, (to_valide_timm timm:$imm))>; ++ (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), +- (XVPICKVE_D v4f64:$xj, (to_valide_timm timm:$imm))>; ++ (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; + + // load + def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), +- (XVLD GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), + (XVLDX GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), +- (XVLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), +- (XVLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), +- (XVLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), +- (XVLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + + // store + def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), +- (XVST LASX256:$xd, GPR:$rj, (to_valide_timm timm:$imm))>; ++ (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), + (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), +- (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + + } // Predicates = [HasExtLASX] +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 13332be0bc38..e021adcecf4d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -145,7 +145,7 @@ def lsxsplati32 : PatFrag<(ops node:$e0), + def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; + +-def to_valide_timm : SDNodeXForm(N); + return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); + }]>; +@@ -1639,10 +1639,10 @@ foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + // Pat<(Intrinsic timm:$imm) + // (LAInst timm:$imm)>; + def : Pat<(int_loongarch_lsx_vldi timm:$imm), +- (VLDI (to_valide_timm timm:$imm))>; ++ (VLDI (to_valid_timm timm:$imm))>; + foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in + def : Pat<(deriveLSXIntrinsic.ret timm:$imm), +- (!cast("Pseudo"#Inst) (to_valide_timm timm:$imm))>; ++ (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vj, timm:$imm) +@@ -1652,25 +1652,25 @@ foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", + "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", + "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", + "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", + "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", + "VREPLVEI_H", "VSHUF4I_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", + "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", + "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", + "VREPLVEI_W", "VSHUF4I_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", + "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", + "VPICKVE2GR_D", "VPICKVE2GR_DU", + "VREPLVEI_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), +- (!cast(Inst) LSX128:$vj, (to_valide_timm timm:$imm))>; ++ (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) +@@ -1682,7 +1682,7 @@ foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", + "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", +@@ -1690,7 +1690,7 @@ foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", + "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", +@@ -1698,7 +1698,7 @@ foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", + "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", +@@ -1706,7 +1706,7 @@ foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, +- (to_valide_timm timm:$imm))>; ++ (to_valid_timm timm:$imm))>; + + // vty: v16i8/v8i16/v4i32/v2i64 + // Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +@@ -1788,36 +1788,36 @@ foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", + + // load + def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), +- (VLD GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLD GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), + (VLDX GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), +- (VLDREPL_B GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), +- (VLDREPL_H GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), +- (VLDREPL_W GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), +- (VLDREPL_D GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + + // store + def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), +- (VST LSX128:$vd, GPR:$rj, (to_valide_timm timm:$imm))>; ++ (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; + def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), + (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; + + def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_B v16i8:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_H v8i16:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_W v4i32:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), +- (VSTELM_D v2i64:$vd, GPR:$rj, (to_valide_timm timm:$imm), +- (to_valide_timm timm:$idx))>; ++ (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), ++ (to_valid_timm timm:$idx))>; + + } // Predicates = [HasExtLSX] +-- +2.20.1 + diff --git a/0022-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch b/0022-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch new file mode 100644 index 0000000000000000000000000000000000000000..76ed112dfcccb19ba402781722948a8c9ad465df --- /dev/null +++ b/0022-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch @@ -0,0 +1,139 @@ +From c847fcdb11c17aa60940258c6d70c69c3f133c28 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 24 Oct 2023 15:46:56 +0800 +Subject: [PATCH 22/66] [LoongArch] Set some operations action for LSX and LASX + +First, expand all truncationg stores and extending loads. Second, +expand everything for `fixedlen_vector_valuetypes`. Finally, we +selectively turn on ones that can be effectively codegen'd. + +Simultaneously, this patch adds floating-point vector types to +load/store patterns. Additional test cases will be included in the IR +instruction test patchs. + +(cherry picked from commit f2441a06c609cedbb7e11303907f07bf0ca5cb2f) + +Change-Id: I473c19a718f4582c166609fda5d1fa3fe0e602dc +--- + .../LoongArch/LoongArchISelLowering.cpp | 74 +++++++++++++++++-- + .../LoongArch/LoongArchLASXInstrInfo.td | 2 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 2 +- + 3 files changed, 69 insertions(+), 9 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 2f8ce57d3f5f..d3627cec2e8c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -214,16 +214,76 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // Set operations for 'LSX' feature. + +- if (Subtarget.hasExtLSX()) +- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, +- {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal); ++ if (Subtarget.hasExtLSX()) { ++ for (MVT VT : MVT::fixedlen_vector_valuetypes()) { ++ // Expand all truncating stores and extending loads. ++ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { ++ setTruncStoreAction(VT, InnerVT, Expand); ++ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); ++ } ++ // By default everything must be expanded. Then we will selectively turn ++ // on ones that can be effectively codegen'd. ++ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) ++ setOperationAction(Op, VT, Expand); ++ } ++ ++ for (MVT VT : LSXVTs) { ++ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); ++ setOperationAction(ISD::BITCAST, VT, Legal); ++ setOperationAction(ISD::UNDEF, VT, Legal); ++ ++ // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it ++ // will be `Custom` handled in the future. ++ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ } ++ for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { ++ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, ++ Legal); ++ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, ++ VT, Legal); ++ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); ++ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); ++ setOperationAction(ISD::CTPOP, VT, Legal); ++ } ++ for (MVT VT : {MVT::v4f32, MVT::v2f64}) { ++ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); ++ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); ++ setOperationAction(ISD::FMA, VT, Legal); ++ } ++ } + + // Set operations for 'LASX' feature. + +- if (Subtarget.hasExtLASX()) +- setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, +- {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}, +- Legal); ++ if (Subtarget.hasExtLASX()) { ++ for (MVT VT : LASXVTs) { ++ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); ++ setOperationAction(ISD::BITCAST, VT, Legal); ++ setOperationAction(ISD::UNDEF, VT, Legal); ++ ++ // FIXME: Same as above. ++ setOperationAction(ISD::BUILD_VECTOR, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ } ++ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { ++ setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); ++ setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, ++ Legal); ++ setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, ++ VT, Legal); ++ setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); ++ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); ++ setOperationAction(ISD::CTPOP, VT, Legal); ++ } ++ for (MVT VT : {MVT::v8f32, MVT::v4f64}) { ++ setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); ++ setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); ++ setOperationAction(ISD::FMA, VT, Legal); ++ } ++ } + + // Set DAG combine for LA32 and LA64. + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 947950be2b8f..e19aa92266b1 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1394,7 +1394,7 @@ def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + + // Loads/Stores +-foreach vt = [v32i8, v16i16, v8i32, v4i64] in { ++foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index e021adcecf4d..9391b1a8a20c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1494,7 +1494,7 @@ def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + + // Loads/Stores +-foreach vt = [v16i8, v8i16, v4i32, v2i64] in { ++foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; +-- +2.20.1 + diff --git a/0023-LoongArch-Add-codegen-support-for-extractelement-737.patch b/0023-LoongArch-Add-codegen-support-for-extractelement-737.patch new file mode 100644 index 0000000000000000000000000000000000000000..e85431424fb05bda8d30beabc28dbb644f2e54fa --- /dev/null +++ b/0023-LoongArch-Add-codegen-support-for-extractelement-737.patch @@ -0,0 +1,517 @@ +From 5755e6dc93999099ef4825971cd613cf1754ab6c Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 30 Nov 2023 17:29:18 +0800 +Subject: [PATCH 23/66] [LoongArch] Add codegen support for extractelement + (#73759) + +Add codegen support for extractelement when enable `lsx` or `lasx` +feature. + +(cherry picked from commit b72456120f1db38ed7068fb592fcf768c6d5cce2) + +Change-Id: If45075ec3b5f8fc8388b81c4da71e31c98d444ff +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../Target/LoongArch/LoongArchInstrInfo.cpp | 8 + + .../LoongArch/LoongArchLASXInstrInfo.td | 38 ++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 38 ++++ + .../lasx/ir-instruction/extractelement.ll | 172 ++++++++++++++++++ + .../lsx/ir-instruction/extractelement.ll | 170 +++++++++++++++++ + 6 files changed, 428 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index d3627cec2e8c..26e94a53b344 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // will be `Custom` handled in the future. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -267,6 +268,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // FIXME: Same as above. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index ddd1c9943fac..6576100d3b32 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -90,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + Opc = LoongArch::FMOV_S; + } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { + Opc = LoongArch::FMOV_D; ++ } else if (LoongArch::GPRRegClass.contains(DstReg) && ++ LoongArch::FPR32RegClass.contains(SrcReg)) { ++ // FPR32 -> GPR copies ++ Opc = LoongArch::MOVFR2GR_S; ++ } else if (LoongArch::GPRRegClass.contains(DstReg) && ++ LoongArch::FPR64RegClass.contains(SrcReg)) { ++ // FPR64 -> GPR copies ++ Opc = LoongArch::MOVFR2GR_D; + } else { + // TODO: support other copies. + llvm_unreachable("Impossible reg-to-reg copy"); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index e19aa92266b1..380206ddcf10 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1401,6 +1401,44 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + def : RegRegStPat; + } + ++// Vector extraction with constant index. ++def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), ++ (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; ++def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), ++ (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), ++ (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), ++ (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), ++ (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), ++ (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; ++ ++// Vector extraction with variable index. ++def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 24))>; ++def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 16))>; ++def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), ++ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), ++ sub_32)), ++ GPR)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), ++ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), ++ sub_64)), ++ GPR)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), ++ (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), ++ (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 9391b1a8a20c..980870e34503 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1501,6 +1501,44 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + def : RegRegStPat; + } + ++// Vector extraction with constant index. ++def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), ++ (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; ++def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), ++ (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), ++ (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; ++def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), ++ (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; ++def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), ++ (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; ++def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), ++ (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; ++ ++// Vector extraction with variable index. ++def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 24))>; ++def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), ++ (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i64:$rk), ++ sub_32)), ++ GPR), (i64 16))>; ++def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), ++ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), ++ sub_32)), ++ GPR)>; ++def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), ++ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), ++ sub_64)), ++ GPR)>; ++def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), ++ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; ++def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), ++ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +new file mode 100644 +index 000000000000..78f584cd09a8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +@@ -0,0 +1,172 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @extract_32xi8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_32xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %e = extractelement <32 x i8> %v, i32 1 ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_16xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %e = extractelement <16 x i16> %v, i32 1 ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %e = extractelement <8 x i32> %v, i32 1 ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %e = extractelement <4 x i64> %v, i32 1 ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: ori $a0, $zero, 7 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %e = extractelement <8 x float> %v, i32 7 ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: ori $a0, $zero, 3 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %e = extractelement <4 x double> %v, i32 3 ++ store double %e, ptr %dst ++ ret void ++} ++ ++define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_32xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %e = extractelement <32 x i8> %v, i32 %idx ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_16xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %e = extractelement <16 x i16> %v, i32 %idx ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %e = extractelement <8 x i32> %v, i32 %idx ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %e = extractelement <4 x i64> %v, i32 %idx ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %e = extractelement <8 x float> %v, i32 %idx ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %e = extractelement <4 x double> %v, i32 %idx ++ store double %e, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +new file mode 100644 +index 000000000000..b8798c97861e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +@@ -0,0 +1,170 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @extract_16xi8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_16xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %e = extractelement <16 x i8> %v, i32 1 ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_8xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %e = extractelement <8 x i16> %v, i32 1 ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 1 ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xi64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_2xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %e = extractelement <2 x i64> %v, i32 1 ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_4xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %e = extractelement <4 x float> %v, i32 1 ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract_2xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %e = extractelement <2 x double> %v, i32 1 ++ store double %e, ptr %dst ++ ret void ++} ++ ++define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_16xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %e = extractelement <16 x i8> %v, i32 %idx ++ store i8 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_8xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %e = extractelement <8 x i16> %v, i32 %idx ++ store i16 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 %idx ++ store i32 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_2xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 ++; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %e = extractelement <2 x i64> %v, i32 %idx ++ store i64 %e, ptr %dst ++ ret void ++} ++ ++define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_4xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 ++; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %e = extractelement <4 x float> %v, i32 %idx ++ store float %e, ptr %dst ++ ret void ++} ++ ++define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ++; CHECK-LABEL: extract_2xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 ++; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %e = extractelement <2 x double> %v, i32 %idx ++ store double %e, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch b/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch new file mode 100644 index 0000000000000000000000000000000000000000..592446d46daca1c65ad464e98d2a05765aa12525 --- /dev/null +++ b/0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch @@ -0,0 +1,1527 @@ +From cb9eb018760fee3f3767b4b5dc0f56ef7afd7d1c Mon Sep 17 00:00:00 2001 +From: leecheechen +Date: Thu, 30 Nov 2023 21:41:18 +0800 +Subject: [PATCH 24/66] [LoongArch] Add some binary IR instructions testcases + for LSX (#73929) + +The IR instructions include: +- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv +- Bitwise Binary Operations: shl lshr ashr + +(cherry picked from commit 29a0f3ec2b47630ce229953fe7250e741b6c10b6) + +Change-Id: Iee7835772eff7d05046ef7c771b41a80585fb12c +--- + .../LoongArch/lsx/ir-instruction/add.ll | 122 +++++++++ + .../LoongArch/lsx/ir-instruction/ashr.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/fadd.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fdiv.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fmul.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/fsub.ll | 34 +++ + .../LoongArch/lsx/ir-instruction/lshr.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/mul.ll | 242 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/sdiv.ll | 134 ++++++++++ + .../LoongArch/lsx/ir-instruction/shl.ll | 178 +++++++++++++ + .../LoongArch/lsx/ir-instruction/sub.ll | 122 +++++++++ + .../LoongArch/lsx/ir-instruction/udiv.ll | 122 +++++++++ + 12 files changed, 1412 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +new file mode 100644 +index 000000000000..2a7c37c2ae34 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = add <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = add <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = add <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = add <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v16i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = add <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v8i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = add <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = add <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v2i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = add <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll +new file mode 100644 +index 000000000000..fbc570d77ba8 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = ashr <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = ashr <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = ashr <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = ashr <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = ashr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = ashr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = ashr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = ashr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = ashr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = ashr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = ashr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = ashr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll +new file mode 100644 +index 000000000000..1fa1f611c4a3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fadd <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fadd <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +new file mode 100644 +index 000000000000..eb7c8bd9616e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fdiv <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fdiv <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll +new file mode 100644 +index 000000000000..e7fb527f7805 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fmul <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fmul <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll +new file mode 100644 +index 000000000000..df98182321da +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = fsub <4 x float> %v0, %v1 ++ store <4 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = fsub <2 x double> %v0, %v1 ++ store <2 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll +new file mode 100644 +index 000000000000..dada52f93060 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = lshr <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = lshr <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = lshr <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = lshr <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = lshr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = lshr <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = lshr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = lshr <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = lshr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = lshr <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = lshr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = lshr <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +new file mode 100644 +index 000000000000..5060240cd8b1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +@@ -0,0 +1,242 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = mul <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = mul <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = mul <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = mul <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, %v0 ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, %v0 ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, %v0 ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, %v0 ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i8_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = mul <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i16_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = mul <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i32_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = mul <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v2i64_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ori $a2, $zero, 17 ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = mul <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll +new file mode 100644 +index 000000000000..b68f73a74913 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll +@@ -0,0 +1,134 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = sdiv <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = sdiv <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = sdiv <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = sdiv <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 ++; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = sdiv <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 ++; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = sdiv <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 ++; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = sdiv <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 ++; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = sdiv <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll +new file mode 100644 +index 000000000000..fa0aebaf28b3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = shl <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = shl <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = shl <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = shl <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = shl <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.b $vr0, $vr0, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = shl <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = shl <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = shl <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = shl <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = shl <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v2i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = shl <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v2i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslli.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = shl <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +new file mode 100644 +index 000000000000..25b4623a47d1 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = sub <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = sub <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = sub <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = sub <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v16i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = sub <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v8i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = sub <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v4i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = sub <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v2i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = sub <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll +new file mode 100644 +index 000000000000..abb60b91dd48 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = udiv <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = udiv <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = udiv <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = udiv <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v16i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = udiv <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v8i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = udiv <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v4i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = udiv <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v2i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = udiv <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0025-LoongArch-Add-codegen-support-for-insertelement.patch b/0025-LoongArch-Add-codegen-support-for-insertelement.patch new file mode 100644 index 0000000000000000000000000000000000000000..22cb39e8131a9c1d850b5535c80f8eb40d5fadd3 --- /dev/null +++ b/0025-LoongArch-Add-codegen-support-for-insertelement.patch @@ -0,0 +1,701 @@ +From bae7130f659fb0d052523d6e667684f178a9515b Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 16 Nov 2023 20:05:01 +0800 +Subject: [PATCH 25/66] [LoongArch] Add codegen support for insertelement + +(cherry picked from commit f2cbd1fdf702afe31d0198c9185e08dc2b104252) + +Change-Id: I29ad0969265aa1db39219a40b5a02dd53057a6c1 +--- + .../LoongArch/LoongArchISelLowering.cpp | 82 +++++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 18 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 + + .../lasx/ir-instruction/insertelement.ll | 276 ++++++++++++++++++ + .../lsx/ir-instruction/insertelement.ll | 196 +++++++++++++ + 6 files changed, 576 insertions(+), 2 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 26e94a53b344..492339ce2151 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -237,7 +237,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it + // will be `Custom` handled in the future. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); +- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { +@@ -267,7 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // FIXME: Same as above. + setOperationAction(ISD::BUILD_VECTOR, VT, Legal); +- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { +@@ -369,10 +369,20 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerRETURNADDR(Op, DAG); + case ISD::WRITE_REGISTER: + return lowerWRITE_REGISTER(Op, DAG); ++ case ISD::INSERT_VECTOR_ELT: ++ return lowerINSERT_VECTOR_ELT(Op, DAG); + } + return SDValue(); + } + ++SDValue ++LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (isa(Op->getOperand(2))) ++ return Op; ++ return SDValue(); ++} ++ + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + +@@ -3040,6 +3050,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + return SinkBB; + } + ++static MachineBasicBlock * ++emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, ++ const LoongArchSubtarget &Subtarget) { ++ unsigned InsOp; ++ unsigned HalfSize; ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected opcode"); ++ case LoongArch::PseudoXVINSGR2VR_B: ++ HalfSize = 16; ++ InsOp = LoongArch::VINSGR2VR_B; ++ break; ++ case LoongArch::PseudoXVINSGR2VR_H: ++ HalfSize = 8; ++ InsOp = LoongArch::VINSGR2VR_H; ++ break; ++ } ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; ++ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; ++ DebugLoc DL = MI.getDebugLoc(); ++ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); ++ // XDst = vector_insert XSrc, Elt, Idx ++ Register XDst = MI.getOperand(0).getReg(); ++ Register XSrc = MI.getOperand(1).getReg(); ++ Register Elt = MI.getOperand(2).getReg(); ++ unsigned Idx = MI.getOperand(3).getImm(); ++ ++ Register ScratchReg1 = XSrc; ++ if (Idx >= HalfSize) { ++ ScratchReg1 = MRI.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) ++ .addReg(XSrc) ++ .addReg(XSrc) ++ .addImm(1); ++ } ++ ++ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); ++ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) ++ .addReg(ScratchReg1, 0, LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) ++ .addReg(ScratchSubReg1) ++ .addReg(Elt) ++ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); ++ ++ Register ScratchReg2 = XDst; ++ if (Idx >= HalfSize) ++ ScratchReg2 = MRI.createVirtualRegister(RC); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) ++ .addImm(0) ++ .addReg(ScratchSubReg2) ++ .addImm(LoongArch::sub_128); ++ ++ if (Idx >= HalfSize) ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) ++ .addReg(XSrc) ++ .addReg(ScratchReg2) ++ .addImm(2); ++ ++ MI.eraseFromParent(); ++ return BB; ++} ++ + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); +@@ -3095,6 +3170,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + case LoongArch::PseudoXVBNZ_W: + case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); ++ case LoongArch::PseudoXVINSGR2VR_B: ++ case LoongArch::PseudoXVINSGR2VR_H: ++ return emitPseudoXVINSGR2VR(MI, BB, Subtarget); + } + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 7765057ebffb..29028ff963d0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -275,6 +275,7 @@ private: + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 380206ddcf10..475565db15c9 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1065,6 +1065,13 @@ def PseudoXVBZ_W : VecCond; + def PseudoXVBZ_D : VecCond; + def PseudoXVBZ : VecCond; + ++let usesCustomInserter = 1, Constraints = "$xd = $dst" in { ++def PseudoXVINSGR2VR_B ++ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; ++def PseudoXVINSGR2VR_H ++ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; ++} // usesCustomInserter = 1, Constraints = "$xd = $dst" ++ + } // Predicates = [HasExtLASX] + + multiclass PatXr { +@@ -1365,12 +1372,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// PseudoXVINSGR2VR_{B/H} ++def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), ++ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; ++def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), ++ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; ++ + // XVINSGR2VR_{W/D} + def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), + (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; + def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), + (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; + ++def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), ++ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; ++def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), ++ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; ++ + // XVPICKVE2GR_W[U] + def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 980870e34503..d8fd132a1c59 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1462,6 +1462,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), + def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), + (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; + ++def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), ++ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; ++def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), ++ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; ++ + // VPICKVE2GR_{B/H/W}[U] + def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +new file mode 100644 +index 000000000000..e571a5d2e4cf +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +@@ -0,0 +1,276 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { ++; CHECK-LABEL: insert_32xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { ++; CHECK-LABEL: insert_32xi8_upper: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { ++; CHECK-LABEL: insert_16xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { ++; CHECK-LABEL: insert_16xi16_upper: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { ++; CHECK-LABEL: insert_8xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 ++ store <8 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { ++; CHECK-LABEL: insert_4xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 ++ store <4 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { ++; CHECK-LABEL: insert_8xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.s $a2, $fa0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %v_new = insertelement <8 x float> %v, float %in, i32 1 ++ store <8 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { ++; CHECK-LABEL: insert_4xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.d $a2, $fa0 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %v_new = insertelement <4 x double> %v, double %in, i32 1 ++ store <4 x double> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_32xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 ++; CHECK-NEXT: st.b $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <32 x i8>, ptr %src ++ %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx ++ store <32 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_16xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 ++; CHECK-NEXT: st.h $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i16>, ptr %src ++ %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx ++ store <16 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 ++; CHECK-NEXT: st.w $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i32>, ptr %src ++ %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx ++ store <8 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a4, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a4, 5 ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 ++; CHECK-NEXT: st.d $a2, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i64>, ptr %src ++ %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx ++ store <4 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: fst.s $fa0, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x float>, ptr %src ++ %v_new = insertelement <8 x float> %v, float %in, i32 %idx ++ store <8 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: fst.d $fa0, $a0, 0 ++; CHECK-NEXT: xvld $xr0, $sp, 0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x double>, ptr %src ++ %v_new = insertelement <4 x double> %v, double %in, i32 %idx ++ store <4 x double> %v_new, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +new file mode 100644 +index 000000000000..a9834591aa0e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +@@ -0,0 +1,196 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { ++; CHECK-LABEL: insert_16xi8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 ++ store <16 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { ++; CHECK-LABEL: insert_8xi16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 ++ store <8 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { ++; CHECK-LABEL: insert_4xi32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 ++ store <4 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { ++; CHECK-LABEL: insert_2xi64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 ++ store <2 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { ++; CHECK-LABEL: insert_4xfloat: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.s $a2, $fa0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %v_new = insertelement <4 x float> %v, float %ins, i32 1 ++ store <4 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { ++; CHECK-LABEL: insert_2xdouble: ++; CHECK: # %bb.0: ++; CHECK-NEXT: movfr2gr.d $a2, $fa0 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %v_new = insertelement <2 x double> %v, double %ins, i32 1 ++ store <2 x double> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_16xi8_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 ++; CHECK-NEXT: st.b $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <16 x i8>, ptr %src ++ %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx ++ store <16 x i8> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_8xi16_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 ++; CHECK-NEXT: st.h $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <8 x i16>, ptr %src ++ %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx ++ store <8 x i16> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xi32_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 ++; CHECK-NEXT: st.w $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx ++ store <4 x i32> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_2xi64_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vst $vr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 ++; CHECK-NEXT: st.d $a2, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x i64>, ptr %src ++ %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx ++ store <2 x i64> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_4xfloat_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr1, $a0, 0 ++; CHECK-NEXT: vst $vr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 ++; CHECK-NEXT: fst.s $fa0, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x float>, ptr %src ++ %v_new = insertelement <4 x float> %v, float %ins, i32 %idx ++ store <4 x float> %v_new, ptr %dst ++ ret void ++} ++ ++define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { ++; CHECK-LABEL: insert_2xdouble_idx: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr1, $a0, 0 ++; CHECK-NEXT: vst $vr1, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 ++; CHECK-NEXT: fst.d $fa0, $a0, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <2 x double>, ptr %src ++ %v_new = insertelement <2 x double> %v, double %ins, i32 %idx ++ store <2 x double> %v_new, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0026-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch b/0026-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e35b135d03c32a595ef0d164ff01037f9c0532e --- /dev/null +++ b/0026-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch @@ -0,0 +1,1302 @@ +From ee5ceea855a05e3aceda6fa092e94741ddc4bb53 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 25 Oct 2023 17:00:32 +0800 +Subject: [PATCH 26/66] [LoongArch] Custom lowering `ISD::BUILD_VECTOR` + +(cherry picked from commit add224c0a094d20389d3659f7b6e496df461a976) + +Change-Id: Ib397b02dde748fbb678a8bf5e9b0208f21245fd2 +--- + .../LoongArch/LoongArchISelDAGToDAG.cpp | 52 +- + .../LoongArch/LoongArchISelLowering.cpp | 102 +++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 13 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 12 +- + .../CodeGen/LoongArch/lasx/build-vector.ll | 551 ++++++++++++++++++ + .../CodeGen/LoongArch/lsx/build-vector.ll | 376 ++++++++++++ + .../LoongArch/lsx/ir-instruction/mul.ll | 28 +- + 8 files changed, 1112 insertions(+), 23 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/build-vector.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/build-vector.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +index f55184019988..01b2f720f902 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { + return; + } + case ISD::BITCAST: { +- if (VT.is128BitVector() || VT.is512BitVector()) { ++ if (VT.is128BitVector() || VT.is256BitVector()) { + ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); + CurDAG->RemoveDeadNode(Node); + return; + } + break; + } ++ case ISD::BUILD_VECTOR: { ++ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of ++ // 128/256-bit when LSX/LASX is enabled. ++ BuildVectorSDNode *BVN = cast(Node); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ unsigned Op; ++ EVT ViaVecTy; ++ bool Is128Vec = BVN->getValueType(0).is128BitVector(); ++ bool Is256Vec = BVN->getValueType(0).is256BitVector(); ++ ++ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) ++ break; ++ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, ++ HasAnyUndefs, 8)) ++ break; ++ ++ switch (SplatBitSize) { ++ default: ++ break; ++ case 8: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; ++ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; ++ break; ++ case 16: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; ++ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; ++ break; ++ case 32: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; ++ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; ++ break; ++ case 64: ++ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; ++ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; ++ break; ++ } ++ ++ SDNode *Res; ++ // If we have a signed 10 bit integer, we can splat it directly. ++ if (SplatValue.isSignedIntN(10)) { ++ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, ++ ViaVecTy.getVectorElementType()); ++ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); ++ ReplaceNode(Node, Res); ++ return; ++ } ++ break; ++ } + } + + // Select the default instruction. +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 492339ce2151..1b60bfc3bddb 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -234,11 +234,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + +- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it +- // will be `Custom` handled in the future. +- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -265,10 +263,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + +- // FIXME: Same as above. +- setOperationAction(ISD::BUILD_VECTOR, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); +@@ -371,10 +368,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::BUILD_VECTOR: ++ return lowerBUILD_VECTOR(Op, DAG); + } + return SDValue(); + } + ++static bool isConstantOrUndef(const SDValue Op) { ++ if (Op->isUndef()) ++ return true; ++ if (isa(Op)) ++ return true; ++ if (isa(Op)) ++ return true; ++ return false; ++} ++ ++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { ++ for (unsigned i = 0; i < Op->getNumOperands(); ++i) ++ if (isConstantOrUndef(Op->getOperand(i))) ++ return true; ++ return false; ++} ++ ++SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ BuildVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ bool Is128Vec = ResTy.is128BitVector(); ++ bool Is256Vec = ResTy.is256BitVector(); ++ ++ if ((!Subtarget.hasExtLSX() || !Is128Vec) && ++ (!Subtarget.hasExtLASX() || !Is256Vec)) ++ return SDValue(); ++ ++ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ /*MinSplatBits=*/8) && ++ SplatBitSize <= 64) { ++ // We can only cope with 8, 16, 32, or 64-bit elements. ++ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && ++ SplatBitSize != 64) ++ return SDValue(); ++ ++ EVT ViaVecTy; ++ ++ switch (SplatBitSize) { ++ default: ++ return SDValue(); ++ case 8: ++ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; ++ break; ++ case 16: ++ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; ++ break; ++ case 32: ++ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; ++ break; ++ case 64: ++ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; ++ break; ++ } ++ ++ // SelectionDAG::getConstant will promote SplatValue appropriately. ++ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); ++ ++ // Bitcast to the type we originally wanted. ++ if (ViaVecTy != ResTy) ++ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); ++ ++ return Result; ++ } ++ ++ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) ++ return Op; ++ ++ if (!isConstantOrUndefBUILD_VECTOR(Node)) { ++ // Use INSERT_VECTOR_ELT operations rather than expand to stores. ++ // The resulting code is the same length as the expansion, but it doesn't ++ // use memory operations. ++ EVT ResTy = Node->getValueType(0); ++ ++ assert(ResTy.isVector()); ++ ++ unsigned NumElts = ResTy.getVectorNumElements(); ++ SDValue Vector = DAG.getUNDEF(ResTy); ++ for (unsigned i = 0; i < NumElts; ++i) { ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, ++ Node->getOperand(i), ++ DAG.getConstant(i, DL, Subtarget.getGRLenVT())); ++ } ++ return Vector; ++ } ++ ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 29028ff963d0..111376306374 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -276,6 +276,7 @@ private: + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 475565db15c9..4487152fb42b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -33,6 +33,13 @@ def lasxsplati32 + def lasxsplati64 + : PatFrag<(ops node:$e0), + (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplatf32 ++ : PatFrag<(ops node:$e0), ++ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def lasxsplatf64 ++ : PatFrag<(ops node:$e0), ++ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; + + //===----------------------------------------------------------------------===// + // Instruction class templates +@@ -1411,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + ++// XVREPL128VEI_{W/D} ++def : Pat<(lasxsplatf32 FPR32:$fj), ++ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++def : Pat<(lasxsplatf64 FPR64:$fj), ++ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ + // Loads/Stores + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index d8fd132a1c59..deac5015882d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0), + def lsxsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +- + def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; ++def lsxsplatf32 : PatFrag<(ops node:$e0), ++ (v4f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def lsxsplatf64 : PatFrag<(ops node:$e0), ++ (v2f64 (build_vector node:$e0, node:$e0))>; + + def to_valid_timm : SDNodeXForm(N); +@@ -1498,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + ++// VREPLVEI_{W/D} ++def : Pat<(lsxsplatf32 FPR32:$fj), ++ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++def : Pat<(lsxsplatf64 FPR64:$fj), ++ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ + // Loads/Stores + foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +new file mode 100644 +index 000000000000..6824ab5cda8d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +@@ -0,0 +1,551 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { ++; CHECK-LABEL: buildvector_v32i8_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 ++ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer ++ store <32 x i8> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { ++; CHECK-LABEL: buildvector_v16i16_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 ++ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ++ store <16 x i16> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { ++; CHECK-LABEL: buildvector_v8i32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 ++ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ++ store <8 x i32> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { ++; CHECK-LABEL: buildvector_v4i64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 ++ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ++ store <4 x i64> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { ++; CHECK-LABEL: buildvector_v8f32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 ++; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x float> undef, float %a0, i8 0 ++ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer ++ store <8 x float> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { ++; CHECK-LABEL: buildvector_v4f64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 ++; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x double> undef, double %a0, i8 0 ++ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer ++ store <4 x double> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v32i8_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.b $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <32 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i16_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.h $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.w $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvrepli.d $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $a1, 260096 ++; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4f64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu52i.d $a1, $zero, 1023 ++; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v32i8_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <32 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i16_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4f64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { ++; CHECK-LABEL: buildvector_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 ++; CHECK-NEXT: ld.b $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 ++; CHECK-NEXT: ld.b $a1, $sp, 8 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 ++; CHECK-NEXT: ld.b $a1, $sp, 16 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 ++; CHECK-NEXT: ld.b $a1, $sp, 24 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 ++; CHECK-NEXT: ld.b $a1, $sp, 32 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 ++; CHECK-NEXT: ld.b $a1, $sp, 40 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 ++; CHECK-NEXT: ld.b $a1, $sp, 48 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 ++; CHECK-NEXT: ld.b $a1, $sp, 56 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 ++; CHECK-NEXT: ld.b $a1, $sp, 64 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 ++; CHECK-NEXT: ld.b $a1, $sp, 72 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 80 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 88 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 96 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 104 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 112 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 120 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 128 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 136 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 144 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 152 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 160 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 168 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 176 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 184 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.b $a1, $sp, 192 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 ++ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 ++ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 ++ %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 ++ %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 ++ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 ++ %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 ++ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 ++ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 ++ %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 ++ %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 ++ %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 ++ %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 ++ %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 ++ %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 ++ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 ++ %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 ++ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 ++ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 ++ %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 ++ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 ++ %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 ++ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 ++ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 ++ %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 ++ %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 ++ %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 ++ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 ++ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 ++ %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 ++ %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 ++ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 ++ store <32 x i8> %ins31, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { ++; CHECK-LABEL: buildvector_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 ++; CHECK-NEXT: ld.h $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 ++; CHECK-NEXT: ld.h $a1, $sp, 8 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 16 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 24 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 32 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 40 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 48 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 56 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: ld.h $a1, $sp, 64 ++; CHECK-NEXT: xvori.b $xr1, $xr0, 0 ++; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 ++ %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 ++ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 ++ %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 ++ %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 ++ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 ++ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 ++ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 ++ %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 ++ %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 ++ %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 ++ %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 ++ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 ++ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 ++ %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 ++ %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 ++ store <16 x i16> %ins15, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { ++; CHECK-LABEL: buildvector_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 ++; CHECK-NEXT: ld.w $a1, $sp, 0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 ++ %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 ++ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 ++ %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 ++ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 ++ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 ++ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 ++ %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 ++ store <8 x i32> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { ++; CHECK-LABEL: buildvector_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 ++ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 ++ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 ++ %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 ++ store <4 x i64> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { ++; CHECK-LABEL: buildvector_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.s $a1, $fa0 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.s $a1, $fa1 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.s $a1, $fa2 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.s $a1, $fa3 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 ++; CHECK-NEXT: movfr2gr.s $a1, $fa4 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 ++; CHECK-NEXT: movfr2gr.s $a1, $fa5 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 ++; CHECK-NEXT: movfr2gr.s $a1, $fa6 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 ++; CHECK-NEXT: movfr2gr.s $a1, $fa7 ++; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x float> undef, float %a0, i32 0 ++ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 ++ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 ++ %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 ++ %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 ++ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 ++ %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 ++ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 ++ store <8 x float> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { ++; CHECK-LABEL: buildvector_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.d $a1, $fa0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.d $a1, $fa1 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.d $a1, $fa2 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.d $a1, $fa3 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x double> undef, double %a0, i32 0 ++ %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 ++ %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 ++ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 ++ store <4 x double> %ins3, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +new file mode 100644 +index 000000000000..3a74db5e1acb +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +@@ -0,0 +1,376 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { ++; CHECK-LABEL: buildvector_v16i8_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 ++ %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ++ store <16 x i8> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { ++; CHECK-LABEL: buildvector_v8i16_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 ++ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ++ store <8 x i16> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { ++; CHECK-LABEL: buildvector_v4i32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 ++ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ++ store <4 x i32> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { ++; CHECK-LABEL: buildvector_v2i64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 ++ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ++ store <2 x i64> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { ++; CHECK-LABEL: buildvector_v4f32_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 ++; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <4 x float> undef, float %a0, i8 0 ++ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer ++ store <4 x float> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { ++; CHECK-LABEL: buildvector_v2f64_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 ++; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %insert = insertelement <2 x double> undef, double %a0, i8 0 ++ %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer ++ store <2 x double> %splat, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i8_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.b $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i16_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.h $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.w $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2i64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vrepli.d $vr0, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu12i.w $a1, 260096 ++; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f64_const_splat: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lu52i.d $a1, $zero, 1023 ++; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v16i8_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <16 x i8> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v8i16_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <8 x i16> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v4i32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x i32> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2i64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x i64> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f32_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f32_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <4 x float> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64_const(ptr %dst) nounwind { ++; CHECK-LABEL: buildvector_v2f64_const: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ store <2 x double> , ptr %dst ++ ret void ++} ++ ++define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { ++; CHECK-LABEL: buildvector_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 ++; CHECK-NEXT: ld.b $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 ++; CHECK-NEXT: ld.b $a1, $sp, 8 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 ++; CHECK-NEXT: ld.b $a1, $sp, 16 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 ++; CHECK-NEXT: ld.b $a1, $sp, 24 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 ++; CHECK-NEXT: ld.b $a1, $sp, 32 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 ++; CHECK-NEXT: ld.b $a1, $sp, 40 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 ++; CHECK-NEXT: ld.b $a1, $sp, 48 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 ++; CHECK-NEXT: ld.b $a1, $sp, 56 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 ++; CHECK-NEXT: ld.b $a1, $sp, 64 ++; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 ++ %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 ++ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 ++ %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 ++ %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 ++ %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 ++ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 ++ %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 ++ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 ++ %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 ++ %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 ++ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 ++ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 ++ %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 ++ %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 ++ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 ++ store <16 x i8> %ins15, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ++; CHECK-LABEL: buildvector_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 ++; CHECK-NEXT: ld.h $a1, $sp, 0 ++; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 ++ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 ++ %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 ++ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 ++ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 ++ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 ++ %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 ++ %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 ++ store <8 x i16> %ins7, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ++; CHECK-LABEL: buildvector_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 ++ %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 ++ %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 ++ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 ++ store <4 x i32> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { ++; CHECK-LABEL: buildvector_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 ++ %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 ++ store <2 x i64> %ins1, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { ++; CHECK-LABEL: buildvector_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.s $a1, $fa0 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.s $a1, $fa1 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 ++; CHECK-NEXT: movfr2gr.s $a1, $fa2 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 ++; CHECK-NEXT: movfr2gr.s $a1, $fa3 ++; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <4 x float> undef, float %a0, i32 0 ++ %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 ++ %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 ++ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 ++ store <4 x float> %ins3, ptr %dst ++ ret void ++} ++ ++define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { ++; CHECK-LABEL: buildvector_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfr2gr.d $a1, $fa0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 ++; CHECK-NEXT: movfr2gr.d $a1, $fa1 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %ins0 = insertelement <2 x double> undef, double %a0, i32 0 ++ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 ++ store <2 x double> %ins1, ptr %dst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +index 5060240cd8b1..d0be9cb7e3c8 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +@@ -180,10 +180,9 @@ entry: + define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v16i8_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.b $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.b $vr1, 17 ++; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -196,10 +195,9 @@ entry: + define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v8i16_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.h $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 17 ++; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -212,10 +210,9 @@ entry: + define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v4i32_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.w $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 17 ++; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -228,10 +225,9 @@ entry: + define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { + ; CHECK-LABEL: mul_v2i64_17: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: ori $a2, $zero, 17 +-; CHECK-NEXT: vreplgr2vr.d $vr0, $a2 +-; CHECK-NEXT: vld $vr1, $a1, 0 +-; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 17 ++; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 + ; CHECK-NEXT: vst $vr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +-- +2.20.1 + diff --git a/0027-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch b/0027-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch new file mode 100644 index 0000000000000000000000000000000000000000..20dcadb636ad637217d569da9ee08df1bec5d18b --- /dev/null +++ b/0027-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch @@ -0,0 +1,897 @@ +From cfcec815edf0c38df1e25b28de76c1b1f7e35238 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 14 Nov 2023 17:58:52 +0800 +Subject: [PATCH 27/66] [LoongArch] Add more and/or/xor patterns for vector + types + +(cherry picked from commit ca66df3b021017fedf08f0779f5bfc7898dbdd29) + +Change-Id: Ide9ec8c4c158e60d66c273decf60f2038b21d5ca +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 21 +-- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 21 +-- + .../LoongArch/lasx/ir-instruction/and.ll | 125 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/or.ll | 125 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/xor.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/and.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/or.ll | 125 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/xor.ll | 125 ++++++++++++++++++ + 8 files changed, 774 insertions(+), 18 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 4487152fb42b..a5652472481a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1184,10 +1184,6 @@ multiclass PatShiftXrUimm { + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; + } + +-class PatXrXrB +- : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), +- (Inst LASX256:$xj, LASX256:$xk)>; +- + let Predicates = [HasExtLASX] in { + + // XVADD_{B/H/W/D} +@@ -1235,13 +1231,20 @@ defm : PatXrXr; + defm : PatXrXrU; + + // XVAND_V +-def : PatXrXrB; +-// XVNOR_V +-def : PatXrXrB; ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVAND_V LASX256:$xj, LASX256:$xk)>; ++// XVOR_V ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVOR_V LASX256:$xj, LASX256:$xk)>; + // XVXOR_V +-def : PatXrXrB; ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), ++ (XVXOR_V LASX256:$xj, LASX256:$xk)>; + // XVNOR_V +-def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 LASX256:$xk))), ++foreach vt = [v32i8, v16i16, v8i32, v4i64] in ++def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), + (XVNOR_V LASX256:$xj, LASX256:$xk)>; + + // XVANDI_B +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index deac5015882d..5645ce51194a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1261,10 +1261,6 @@ multiclass PatShiftVrUimm { + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; + } + +-class PatVrVrB +- : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), +- (Inst LSX128:$vj, LSX128:$vk)>; +- + let Predicates = [HasExtLSX] in { + + // VADD_{B/H/W/D} +@@ -1312,13 +1308,20 @@ defm : PatVrVr; + defm : PatVrVrU; + + // VAND_V +-def : PatVrVrB; +-// VNOR_V +-def : PatVrVrB; ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VAND_V LSX128:$vj, LSX128:$vk)>; ++// VOR_V ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VOR_V LSX128:$vj, LSX128:$vk)>; + // VXOR_V +-def : PatVrVrB; ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), ++ (VXOR_V LSX128:$vj, LSX128:$vk)>; + // VNOR_V +-def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), ++foreach vt = [v16i8, v8i16, v4i32, v2i64] in ++def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), + (VNOR_V LSX128:$vj, LSX128:$vk)>; + + // VANDI_B +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll +new file mode 100644 +index 000000000000..98c87cadeeb5 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = and <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = and <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = and <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = and <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = and <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = and <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = and <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = and <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll +new file mode 100644 +index 000000000000..f37cbf1cefed +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = or <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = or <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = or <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = or <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvori.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = or <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = or <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = or <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = or <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll +new file mode 100644 +index 000000000000..c2fb1462b7a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = xor <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = xor <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = xor <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = xor <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = xor <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = xor <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = xor <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 31 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = xor <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll +new file mode 100644 +index 000000000000..523255159a81 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = and <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = and <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = and <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: and_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = and <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vandi.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = and <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = and <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = and <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: and_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = and <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll +new file mode 100644 +index 000000000000..f124512acce7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = or <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = or <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = or <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: or_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = or <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vori.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = or <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = or <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = or <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: or_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = or <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll +new file mode 100644 +index 000000000000..ce3e49c990ff +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll +@@ -0,0 +1,125 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v2 = xor <16 x i8> %v0, %v1 ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v2 = xor <8 x i16> %v0, %v1 ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v2 = xor <4 x i32> %v0, %v1 ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: xor_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v2 = xor <2 x i64> %v0, %v1 ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vxori.b $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = xor <16 x i8> %v0, ++ store <16 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = xor <8 x i16> %v0, ++ store <8 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.w $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = xor <4 x i32> %v0, ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: xor_u_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.d $vr1, 31 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = xor <2 x i64> %v0, ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0028-LoongArch-Add-some-binary-IR-instructions-testcases-.patch b/0028-LoongArch-Add-some-binary-IR-instructions-testcases-.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d374205f0f4922f3ab5429294c8b02be7ddc682 --- /dev/null +++ b/0028-LoongArch-Add-some-binary-IR-instructions-testcases-.patch @@ -0,0 +1,1523 @@ +From 14502c64687d2a8524db46d0c952a54ccea17682 Mon Sep 17 00:00:00 2001 +From: leecheechen +Date: Fri, 1 Dec 2023 13:14:11 +0800 +Subject: [PATCH 28/66] [LoongArch] Add some binary IR instructions testcases + for LASX (#74031) + +The IR instructions include: +- Binary Operations: add fadd sub fsub mul fmul udiv sdiv fdiv +- Bitwise Binary Operations: shl lshr ashr + +(cherry picked from commit dbbc7c31c8e55d72dc243b244e386a25132e7215) + +Change-Id: I0f815a26c3e78f6c5cab0a8568e7cec129c7bb42 +--- + .../LoongArch/lasx/ir-instruction/add.ll | 122 +++++++++ + .../LoongArch/lasx/ir-instruction/ashr.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/fadd.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fdiv.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fmul.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/fsub.ll | 34 +++ + .../LoongArch/lasx/ir-instruction/lshr.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/mul.ll | 238 ++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/sdiv.ll | 134 ++++++++++ + .../LoongArch/lasx/ir-instruction/shl.ll | 178 +++++++++++++ + .../LoongArch/lasx/ir-instruction/sub.ll | 122 +++++++++ + .../LoongArch/lasx/ir-instruction/udiv.ll | 122 +++++++++ + 12 files changed, 1408 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +new file mode 100644 +index 000000000000..8e4d0dc6f1c3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = add <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = add <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = add <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: add_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = add <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v32i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = add <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v16i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = add <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = add <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: add_v4i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = add <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll +new file mode 100644 +index 000000000000..fcbf0f1400fe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = ashr <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = ashr <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = ashr <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: ashr_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = ashr <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = ashr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = ashr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = ashr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = ashr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = ashr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = ashr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = ashr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: ashr_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = ashr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll +new file mode 100644 +index 000000000000..365bb305fc5a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fadd <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fadd_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fadd <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +new file mode 100644 +index 000000000000..284121a79a49 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fdiv <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fdiv_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fdiv <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll +new file mode 100644 +index 000000000000..a48dca8d2847 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fmul <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fmul_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fmul <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll +new file mode 100644 +index 000000000000..6164aa5a55c7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll +@@ -0,0 +1,34 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = fsub <8 x float> %v0, %v1 ++ store <8 x float> %v2, ptr %res ++ ret void ++} ++ ++define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: fsub_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = fsub <4 x double> %v0, %v1 ++ store <4 x double> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll +new file mode 100644 +index 000000000000..24be69d8032a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = lshr <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = lshr <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = lshr <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: lshr_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = lshr <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = lshr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = lshr <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = lshr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = lshr <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = lshr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = lshr <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = lshr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: lshr_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = lshr <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll +new file mode 100644 +index 000000000000..dcb893caa255 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll +@@ -0,0 +1,238 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = mul <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = mul <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = mul <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mul_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = mul <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, %v0 ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, %v0 ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, %v0 ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_square_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, %v0 ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v32i8_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.b $xr1, 17 ++; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = mul <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v16i16_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, 17 ++; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = mul <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v8i32_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.w $xr1, 17 ++; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = mul <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: mul_v4i64_17: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.d $xr1, 17 ++; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = mul <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll +new file mode 100644 +index 000000000000..e3635a5f14a2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll +@@ -0,0 +1,134 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = sdiv <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = sdiv <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = sdiv <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sdiv_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = sdiv <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 ++; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = sdiv <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 ++; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = sdiv <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 ++; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = sdiv <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sdiv_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 ++; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = sdiv <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll +new file mode 100644 +index 000000000000..8a02c7e3ac97 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll +@@ -0,0 +1,178 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = shl <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = shl <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = shl <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: shl_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = shl <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v32i8_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = shl <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v32i8_7: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = shl <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i16_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = shl <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v16i16_15: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = shl <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i32_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = shl <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = shl <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i64_1: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = shl <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: shl_v4i64_63: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = shl <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +new file mode 100644 +index 000000000000..bcfff1651477 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = sub <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = sub <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = sub <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: sub_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = sub <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v32i8_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = sub <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v16i16_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = sub <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v8i32_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = sub <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sub_v4i64_31: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = sub <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll +new file mode 100644 +index 000000000000..e78084c7186d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll +@@ -0,0 +1,122 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v2 = udiv <32 x i8> %v0, %v1 ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v2 = udiv <16 x i16> %v0, %v1 ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v2 = udiv <8 x i32> %v0, %v1 ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: udiv_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v2 = udiv <4 x i64> %v0, %v1 ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v32i8_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = udiv <32 x i8> %v0, ++ store <32 x i8> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v16i16_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = udiv <16 x i16> %v0, ++ store <16 x i16> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v8i32_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = udiv <8 x i32> %v0, ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: udiv_v4i64_8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = udiv <4 x i64> %v0, ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0029-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch b/0029-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch new file mode 100644 index 0000000000000000000000000000000000000000..05c02727ba90dd4166ea34a2ed22e14756866703 --- /dev/null +++ b/0029-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch @@ -0,0 +1,116 @@ +From b18f81359a267958fef5c9e6f203fca6e891cce6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 2 Dec 2023 14:25:17 +0800 +Subject: [PATCH 29/66] [LoongArch] Override TargetLowering::isShuffleMaskLegal + + By default, `isShuffleMaskLegal` always returns true, which can result + in the expansion of `BUILD_VECTOR` into a `VECTOR_SHUFFLE` node in + certain situations. Subsequently, the `VECTOR_SHUFFLE` node is expanded + again into a `BUILD_VECTOR`, leading to an infinite loop. + To address this, we always return false, allowing the expansion of + `BUILD_VECTOR` through the stack. + +(cherry picked from commit 66a3e4fafb6eae19764f8a192ca3a116c0554211) + +Change-Id: Idb8d366a812d3e2b258023a60a1625b9e7ce7735 +--- + .../LoongArch/LoongArchISelLowering.cpp | 10 +++++++++ + .../Target/LoongArch/LoongArchISelLowering.h | 5 +++++ + .../CodeGen/LoongArch/lsx/build-vector.ll | 22 +++++++++++++++++++ + 3 files changed, 37 insertions(+) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 1b60bfc3bddb..e45f21265d7b 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -239,6 +239,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { ++ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); +@@ -268,6 +269,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { ++ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); +@@ -370,10 +372,18 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); ++ case ISD::VECTOR_SHUFFLE: ++ return lowerVECTOR_SHUFFLE(Op, DAG); + } + return SDValue(); + } + ++SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, ++ SelectionDAG &DAG) const { ++ // TODO: custom shuffle. ++ return SDValue(); ++} ++ + static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 111376306374..2c35f9e5d378 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -230,6 +230,10 @@ public: + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + unsigned *Fast = nullptr) const override; + ++ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { ++ return false; ++ } ++ + private: + /// Target-specific function used to lower LoongArch calling conventions. + typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, +@@ -277,6 +281,7 @@ private: + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; +diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +index 3a74db5e1acb..ed1f610a5fa6 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +@@ -374,3 +374,25 @@ entry: + store <2 x double> %ins1, ptr %dst + ret void + } ++ ++;; BUILD_VECTOR through stack. ++;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. ++define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 ++; CHECK-NEXT: st.d $a0, $sp, 0 ++; CHECK-NEXT: vld $vr0, $sp, 0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %v = load volatile <4 x i32>, ptr %src ++ %e = extractelement <4 x i32> %v, i32 1 ++ %z = zext i32 %e to i64 ++ %r = insertelement <2 x i64> undef, i64 %z, i32 0 ++ store <2 x i64> %r, ptr %dst ++ ret void ++} +-- +2.20.1 + diff --git a/0030-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch b/0030-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch new file mode 100644 index 0000000000000000000000000000000000000000..30e09ed048b427bcbfcd96e1a6e1df8e9bb55f59 --- /dev/null +++ b/0030-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch @@ -0,0 +1,390 @@ +From 4bbe405d5a8a789f23a4b430fe619c1cdfa3d631 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Sat, 2 Dec 2023 16:24:33 +0800 +Subject: [PATCH 30/66] Reland "[LoongArch] Support CTLZ with lsx/lasx" + +This patch simultaneously adds tests for `CTPOP`. + +This relands 07cec73dcd095035257eec1f213d273b10988130 with fix tests. + +(cherry picked from commit a60a5421b60be1bce0272385fa16846ada5eed5e) + +Change-Id: I577d0c54b78e4d90ab5fce1afc407d526c793429 +--- + .../LoongArch/LoongArchISelLowering.cpp | 13 +- + .../LoongArch/LoongArchLASXInstrInfo.td | 11 +- + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 11 +- + .../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 115 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll | 115 ++++++++++++++++++ + 5 files changed, 255 insertions(+), 10 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index e45f21265d7b..358263b1a258 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -247,7 +247,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); +- setOperationAction(ISD::CTPOP, VT, Legal); ++ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); +- setOperationAction(ISD::CTPOP, VT, Legal); ++ setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -2800,6 +2800,15 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vclz_b: ++ case Intrinsic::loongarch_lsx_vclz_h: ++ case Intrinsic::loongarch_lsx_vclz_w: ++ case Intrinsic::loongarch_lsx_vclz_d: ++ case Intrinsic::loongarch_lasx_xvclz_b: ++ case Intrinsic::loongarch_lasx_xvclz_h: ++ case Intrinsic::loongarch_lasx_xvclz_w: ++ case Intrinsic::loongarch_lasx_xvclz_d: ++ return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a5652472481a..960ac627578c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1273,6 +1273,9 @@ defm : PatXrXr; + defm : PatShiftXrXr; + defm : PatShiftXrUimm; + ++// XVCLZ_{B/H/W/D} ++defm : PatXr; ++ + // XVPCNT_{B/H/W/D} + defm : PatXr; + +@@ -1590,26 +1593,26 @@ foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", + // (LAInst vty:$xj)>; + foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", + "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", +- "XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", ++ "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", + "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", + "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", +- "XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H", ++ "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", + "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", + "VEXT2XV_DU_HU", "XVREPLVE0_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", +- "XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU", ++ "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", + "XVFFINTL_D_W", "XVFFINTH_D_W", + "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + "XVEXTL_Q_D", "XVEXTL_QU_DU", +- "XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU", ++ "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", + "XVREPLVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5645ce51194a..3480ade9eebf 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1350,6 +1350,9 @@ defm : PatVrVr; + defm : PatShiftVrVr; + defm : PatShiftVrUimm; + ++// VCLZ_{B/H/W/D} ++defm : PatVr; ++ + // VPCNT_{B/H/W/D} + defm : PatVr; + +@@ -1674,21 +1677,21 @@ foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", + // (LAInst vty:$vj)>; + foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", + "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", +- "VCLO_B", "VCLZ_B"] in ++ "VCLO_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", +- "VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in ++ "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", +- "VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU", ++ "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", + "VFFINTL_D_W", "VFFINTH_D_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + "VEXTL_Q_D", "VEXTL_QU_DU", +- "VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in ++ "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +new file mode 100644 +index 000000000000..7786e399c95f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +@@ -0,0 +1,115 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.b $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <32 x i8>, ptr %src ++ %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) ++ store <32 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.h $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i16>, ptr %src ++ %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) ++ store <16 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i32>, ptr %src ++ %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) ++ store <8 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvpcnt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i64>, ptr %src ++ %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) ++ store <4 x i64> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.b $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <32 x i8>, ptr %src ++ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) ++ store <32 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.h $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i16>, ptr %src ++ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) ++ store <16 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i32>, ptr %src ++ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) ++ store <8 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvclz.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i64>, ptr %src ++ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) ++ store <4 x i64> %res, ptr %dst ++ ret void ++} ++ ++declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) ++declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) ++declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) ++declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) ++declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) ++declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) ++declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) ++declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +new file mode 100644 +index 000000000000..5df553fba7ef +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +@@ -0,0 +1,115 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.b $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i8>, ptr %src ++ %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) ++ store <16 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.h $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i16>, ptr %src ++ %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) ++ store <8 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i32>, ptr %src ++ %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) ++ store <4 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctpop_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vpcnt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <2 x i64>, ptr %src ++ %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) ++ store <2 x i64> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.b $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <16 x i8>, ptr %src ++ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) ++ store <16 x i8> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.h $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <8 x i16>, ptr %src ++ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) ++ store <8 x i16> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <4 x i32>, ptr %src ++ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) ++ store <4 x i32> %res, ptr %dst ++ ret void ++} ++ ++define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { ++; CHECK-LABEL: ctlz_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a0, 0 ++; CHECK-NEXT: vclz.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a1, 0 ++; CHECK-NEXT: ret ++ %v = load <2 x i64>, ptr %src ++ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) ++ store <2 x i64> %res, ptr %dst ++ ret void ++} ++ ++declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) ++declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) ++declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) ++declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) ++declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) ++declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) ++declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) ++declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) +-- +2.20.1 + diff --git a/0031-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch b/0031-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch new file mode 100644 index 0000000000000000000000000000000000000000..88308295b7e97ebda786a53878030a74b1b14811 --- /dev/null +++ b/0031-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch @@ -0,0 +1,409 @@ +From 397fac3cd60fbbe17e8e99bfa79c6358b7d53df0 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 4 Dec 2023 10:44:39 +0800 +Subject: [PATCH 31/66] [LoongArch] Support MULHS/MULHU with lsx/lasx + +Mark MULHS/MULHU nodes as legal and adds the necessary patterns. + +(cherry picked from commit e9cd197d15300f186a5a32092103add65fbd3f50) + +Change-Id: I2b69fcb55e566f9285d27fa52be314c40e79c48f +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../LoongArch/LoongArchLASXInstrInfo.td | 4 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 + + llvm/test/CodeGen/LoongArch/lasx/mulh.ll | 162 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/mulh.ll | 162 ++++++++++++++++++ + 5 files changed, 334 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/mulh.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/mulh.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 358263b1a258..3d8d6898a4d5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -248,6 +248,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); ++ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +@@ -278,6 +279,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); ++ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 960ac627578c..240f28b0dc5a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1217,6 +1217,10 @@ defm : PatXrUimm5; + // XVMUL_{B/H/W/D} + defm : PatXrXr; + ++// XVMUH_{B/H/W/D}[U] ++defm : PatXrXr; ++defm : PatXrXrU; ++ + // XVMADD_{B/H/W/D} + defm : PatXrXrXr; + // XVMSUB_{B/H/W/D} +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 3480ade9eebf..fb4726c530b5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1294,6 +1294,10 @@ defm : PatVrUimm5; + // VMUL_{B/H/W/D} + defm : PatVrVr; + ++// VMUH_{B/H/W/D}[U] ++defm : PatVrVr; ++defm : PatVrVrU; ++ + // VMADD_{B/H/W/D} + defm : PatVrVrVr; + // VMSUB_{B/H/W/D} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll +new file mode 100644 +index 000000000000..aac711a4a371 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll +@@ -0,0 +1,162 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v0s = sext <32 x i8> %v0 to <32 x i16> ++ %v1s = sext <32 x i8> %v1 to <32 x i16> ++ %m = mul <32 x i16> %v0s, %v1s ++ %s = ashr <32 x i16> %m, ++ %v2 = trunc <32 x i16> %s to <32 x i8> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v32i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %v0z = zext <32 x i8> %v0 to <32 x i16> ++ %v1z = zext <32 x i8> %v1 to <32 x i16> ++ %m = mul <32 x i16> %v0z, %v1z ++ %s = lshr <32 x i16> %m, ++ %v2 = trunc <32 x i16> %s to <32 x i8> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v0s = sext <16 x i16> %v0 to <16 x i32> ++ %v1s = sext <16 x i16> %v1 to <16 x i32> ++ %m = mul <16 x i32> %v0s, %v1s ++ %s = ashr <16 x i32> %m, ++ %v2 = trunc <16 x i32> %s to <16 x i16> ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v16i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %v0z = zext <16 x i16> %v0 to <16 x i32> ++ %v1z = zext <16 x i16> %v1 to <16 x i32> ++ %m = mul <16 x i32> %v0z, %v1z ++ %s = lshr <16 x i32> %m, ++ %v2 = trunc <16 x i32> %s to <16 x i16> ++ store <16 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v0s = sext <8 x i32> %v0 to <8 x i64> ++ %v1s = sext <8 x i32> %v1 to <8 x i64> ++ %m = mul <8 x i64> %v0s, %v1s ++ %s = ashr <8 x i64> %m, ++ %v2 = trunc <8 x i64> %s to <8 x i32> ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v8i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %v0z = zext <8 x i32> %v0 to <8 x i64> ++ %v1z = zext <8 x i32> %v1 to <8 x i64> ++ %m = mul <8 x i64> %v0z, %v1z ++ %s = lshr <8 x i64> %m, ++ %v2 = trunc <8 x i64> %s to <8 x i32> ++ store <8 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v0s = sext <4 x i64> %v0 to <4 x i128> ++ %v1s = sext <4 x i64> %v1 to <4 x i128> ++ %m = mul <4 x i128> %v0s, %v1s ++ %s = ashr <4 x i128> %m, ++ %v2 = trunc <4 x i128> %s to <4 x i64> ++ store <4 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v4i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %v0z = zext <4 x i64> %v0 to <4 x i128> ++ %v1z = zext <4 x i64> %v1 to <4 x i128> ++ %m = mul <4 x i128> %v0z, %v1z ++ %s = lshr <4 x i128> %m, ++ %v2 = trunc <4 x i128> %s to <4 x i64> ++ store <4 x i64> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll +new file mode 100644 +index 000000000000..e1388f00e355 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll +@@ -0,0 +1,162 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v0s = sext <16 x i8> %v0 to <16 x i16> ++ %v1s = sext <16 x i8> %v1 to <16 x i16> ++ %m = mul <16 x i16> %v0s, %v1s ++ %s = ashr <16 x i16> %m, ++ %v2 = trunc <16 x i16> %s to <16 x i8> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v16i8: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %v0z = zext <16 x i8> %v0 to <16 x i16> ++ %v1z = zext <16 x i8> %v1 to <16 x i16> ++ %m = mul <16 x i16> %v0z, %v1z ++ %s = lshr <16 x i16> %m, ++ %v2 = trunc <16 x i16> %s to <16 x i8> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v0s = sext <8 x i16> %v0 to <8 x i32> ++ %v1s = sext <8 x i16> %v1 to <8 x i32> ++ %m = mul <8 x i32> %v0s, %v1s ++ %s = ashr <8 x i32> %m, ++ %v2 = trunc <8 x i32> %s to <8 x i16> ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v8i16: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %v0z = zext <8 x i16> %v0 to <8 x i32> ++ %v1z = zext <8 x i16> %v1 to <8 x i32> ++ %m = mul <8 x i32> %v0z, %v1z ++ %s = lshr <8 x i32> %m, ++ %v2 = trunc <8 x i32> %s to <8 x i16> ++ store <8 x i16> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v0s = sext <4 x i32> %v0 to <4 x i64> ++ %v1s = sext <4 x i32> %v1 to <4 x i64> ++ %m = mul <4 x i64> %v0s, %v1s ++ %s = ashr <4 x i64> %m, ++ %v2 = trunc <4 x i64> %s to <4 x i32> ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v4i32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %v0z = zext <4 x i32> %v0 to <4 x i64> ++ %v1z = zext <4 x i32> %v1 to <4 x i64> ++ %m = mul <4 x i64> %v0z, %v1z ++ %s = lshr <4 x i64> %m, ++ %v2 = trunc <4 x i64> %s to <4 x i32> ++ store <4 x i32> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhs_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v0s = sext <2 x i64> %v0 to <2 x i128> ++ %v1s = sext <2 x i64> %v1 to <2 x i128> ++ %m = mul <2 x i128> %v0s, %v1s ++ %s = ashr <2 x i128> %m, ++ %v2 = trunc <2 x i128> %s to <2 x i64> ++ store <2 x i64> %v2, ptr %res ++ ret void ++} ++ ++define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: mulhu_v2i64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %v0z = zext <2 x i64> %v0 to <2 x i128> ++ %v1z = zext <2 x i64> %v1 to <2 x i128> ++ %m = mul <2 x i128> %v0z, %v1z ++ %s = lshr <2 x i128> %m, ++ %v2 = trunc <2 x i128> %s to <2 x i64> ++ store <2 x i64> %v2, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0032-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch b/0032-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa9618529a8c6f40e99e912fe704bfe765a22533 --- /dev/null +++ b/0032-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch @@ -0,0 +1,274 @@ +From 363970a20c51d39e1406fa7f516d95142024779d Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 6 Dec 2023 16:43:38 +0800 +Subject: [PATCH 32/66] [LoongArch] Make ISD::VSELECT a legal operation with + lsx/lasx + +(cherry picked from commit de21308f78f3b0f0910638dbdac90967150d19f0) + +Change-Id: I4ae7aa858ca06ff863f3c1a84d459ffa3102cc22 +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 8 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 ++ + llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 86 +++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 86 +++++++++++++++++++ + 5 files changed, 193 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vselect.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vselect.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3d8d6898a4d5..229251987ae4 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -237,6 +237,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); ++ ++ setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); +@@ -268,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); ++ ++ setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); +@@ -305,6 +309,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setStackPointerRegisterToSaveRestore(LoongArch::R3); + + setBooleanContents(ZeroOrOneBooleanContent); ++ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 240f28b0dc5a..0bd8db1bfdf0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), + def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), + (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; + ++// vselect ++def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, ++ (v32i8 (SplatPat_uimm8 uimm8:$imm)))), ++ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; ++foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in ++ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), ++ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index fb4726c530b5..5800ff6f6266 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), + def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + ++// vselect ++def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, ++ (v16i8 (SplatPat_uimm8 uimm8:$imm)))), ++ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; ++foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in ++ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), ++ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +new file mode 100644 +index 000000000000..24f4bcf752d3 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: select_v32i8_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvrepli.h $xr1, -256 ++; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> ++ store <32 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvrepli.h $xr2, -256 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 ++ store <32 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: lu12i.w $a1, -16 ++; CHECK-NEXT: lu32i.d $a1, 0 ++; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 ++ store <16 x i16> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: ori $a1, $zero, 0 ++; CHECK-NEXT: lu32i.d $a1, -1 ++; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 ++ store <8 x i32> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) ++; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) ++; CHECK-NEXT: xvld $xr0, $a3, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvld $xr2, $a2, 0 ++; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 ++ store <4 x i64> %sel, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +new file mode 100644 +index 000000000000..00e3d9313f13 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +@@ -0,0 +1,86 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: select_v16i8_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vrepli.h $vr1, -256 ++; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> ++ store <16 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vrepli.h $vr2, -256 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 ++ store <16 x i8> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: lu12i.w $a1, -16 ++; CHECK-NEXT: lu32i.d $a1, 0 ++; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 ++ store <8 x i16> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: ori $a1, $zero, 0 ++; CHECK-NEXT: lu32i.d $a1, -1 ++; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 ++; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 ++ store <4 x i32> %sel, ptr %res ++ ret void ++} ++ ++define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: select_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) ++; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) ++; CHECK-NEXT: vld $vr0, $a3, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vld $vr2, $a2, 0 ++; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 ++ store <2 x i64> %sel, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0033-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch b/0033-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch new file mode 100644 index 0000000000000000000000000000000000000000..72f6060207627a5a0c84f5df83e8edf229cad78c --- /dev/null +++ b/0033-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch @@ -0,0 +1,3597 @@ +From f82f71d8a0e678db830c4f00f4cb27e824d2a536 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 7 Dec 2023 20:11:43 +0800 +Subject: [PATCH 33/66] [LoongArch] Add codegen support for icmp/fcmp with + lsx/lasx fetaures (#74700) + +Mark ISD::SETCC node as legal, and add handling for the vector types +condition codes. + +(cherry picked from commit 9ff7d0ebeb54347f9006405a6d08ed2b713bc411) + +Change-Id: I5a6229f7c730c7ae4c3ba3a348e4a0f78e670f26 +--- + .../LoongArch/LoongArchISelLowering.cpp | 14 + + .../LoongArch/LoongArchLASXInstrInfo.td | 95 ++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 95 ++ + .../LoongArch/lasx/ir-instruction/fcmp.ll | 692 +++++++++++++ + .../LoongArch/lasx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fcmp.ll | 692 +++++++++++++ + .../LoongArch/lsx/ir-instruction/icmp.ll | 939 ++++++++++++++++++ + 7 files changed, 3466 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 229251987ae4..3d5ae6d3deda 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -238,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + ++ setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { +@@ -251,11 +252,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); ++ setCondCodeAction( ++ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, ++ Expand); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ++ ISD::SETUGE, ISD::SETUGT}, ++ VT, Expand); + } + } + +@@ -271,6 +278,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + ++ setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { +@@ -284,11 +292,17 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); ++ setCondCodeAction( ++ {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, ++ Expand); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ++ ISD::SETUGE, ISD::SETUGT}, ++ VT, Expand); + } + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 0bd8db1bfdf0..a9bf65c6840d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1184,6 +1184,65 @@ multiclass PatShiftXrUimm { + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; + } + ++multiclass PatCCXrSimm5 { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), ++ (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), ++ (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), ++ (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), ++ (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; ++} ++ ++multiclass PatCCXrUimm5 { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), ++ (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), ++ (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), ++ (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), ++ (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; ++} ++ ++multiclass PatCCXrXr { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), ++ (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), ++ (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), ++ (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatCCXrXrU { ++ def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), ++ (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), ++ (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), ++ (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), ++ (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; ++} ++ ++multiclass PatCCXrXrF { ++ def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), ++ (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; ++ def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), ++ (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; ++} ++ + let Predicates = [HasExtLASX] in { + + // XVADD_{B/H/W/D} +@@ -1389,6 +1448,42 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVSEQ[I]_{B/H/W/D} ++defm : PatCCXrSimm5; ++defm : PatCCXrXr; ++ ++// XVSLE[I]_{B/H/W/D}[U] ++defm : PatCCXrSimm5; ++defm : PatCCXrUimm5; ++defm : PatCCXrXr; ++defm : PatCCXrXrU; ++ ++// XVSLT[I]_{B/H/W/D}[U] ++defm : PatCCXrSimm5; ++defm : PatCCXrUimm5; ++defm : PatCCXrXr; ++defm : PatCCXrXrU; ++ ++// XVFCMP.cond.{S/D} ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ ++defm : PatCCXrXrF; ++defm : PatCCXrXrF; ++ + // PseudoXVINSGR2VR_{B/H} + def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), + (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5800ff6f6266..ff21c6681271 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1261,6 +1261,65 @@ multiclass PatShiftVrUimm { + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; + } + ++multiclass PatCCVrSimm5 { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), ++ (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), ++ (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), ++ (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), ++ (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; ++} ++ ++multiclass PatCCVrUimm5 { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), ++ (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), ++ (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), ++ (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), ++ (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), ++ (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; ++} ++ ++multiclass PatCCVrVr { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), ++ (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), ++ (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), ++ (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatCCVrVrU { ++ def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), ++ (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), ++ (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), ++ (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), ++ (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; ++} ++ ++multiclass PatCCVrVrF { ++ def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), ++ (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; ++ def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), ++ (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; ++} ++ + let Predicates = [HasExtLSX] in { + + // VADD_{B/H/W/D} +@@ -1466,6 +1525,42 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VSEQ[I]_{B/H/W/D} ++defm : PatCCVrSimm5; ++defm : PatCCVrVr; ++ ++// VSLE[I]_{B/H/W/D}[U] ++defm : PatCCVrSimm5; ++defm : PatCCVrUimm5; ++defm : PatCCVrVr; ++defm : PatCCVrVrU; ++ ++// VSLT[I]_{B/H/W/D}[U] ++defm : PatCCVrSimm5; ++defm : PatCCVrUimm5; ++defm : PatCCVrVr; ++defm : PatCCVrVrU; ++ ++// VFCMP.cond.{S/D} ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ ++defm : PatCCVrVrF; ++defm : PatCCVrVrF; ++ + // VINSGR2VR_{B/H/W/D} + def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), + (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll +new file mode 100644 +index 000000000000..ef67dbc100c0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll +@@ -0,0 +1,692 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; TREU ++define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_true: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvrepli.b $xr0, -1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp true <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++;; FALSE ++define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_false: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvrepli.b $xr0, 0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp false <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOEQ ++define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp oeq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp oeq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUEQ ++define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ueq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ueq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETEQ ++define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast oeq <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ueq <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLE ++define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ole <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ole <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ule <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ule <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast ole <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ule <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLT ++define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp olt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp olt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ult <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ult <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast olt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ult <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETONE ++define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp one <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp one <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUNE ++define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp une <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp une <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETNE ++define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast one <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast une <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETO ++define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ord <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ord <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUO ++define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp uno <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp uno <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGT ++define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ogt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ogt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp ugt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp ugt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast ogt <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast ugt <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGE ++define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp oge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp oge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp uge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp uge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8f32_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %cmp = fcmp fast oge <8 x float> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f64_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %cmp = fcmp fast uge <4 x double> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll +new file mode 100644 +index 000000000000..6693fe0f6ec7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll +@@ -0,0 +1,939 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; SETEQ ++define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp eq <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp eq <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp eq <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp eq <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp eq <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp eq <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp eq <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp eq <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp sle <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sle <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp sle <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sle <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp sle <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sle <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp sle <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sle <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp ule <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ule <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp ule <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ule <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp ule <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ule <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp ule <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ule <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp slt <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp slt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp slt <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp slt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp slt <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp slt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp slt <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp slt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v32i8_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %cmp = icmp ult <32 x i8> %v0, ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ult <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i16_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %cmp = icmp ult <16 x i16> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ult <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i32_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %cmp = icmp ult <8 x i32> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ult <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i64_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %cmp = icmp ult <4 x i64> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ult <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETNE ++define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ne <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ne <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ne <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a2, 0 ++; CHECK-NEXT: xvld $xr1, $a1, 0 ++; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvrepli.b $xr1, -1 ++; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ne <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sge <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sge <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sge <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sge <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp uge <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp uge <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp uge <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp uge <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp sgt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp sgt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp sgt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp sgt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v32i8_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <32 x i8>, ptr %a0 ++ %v1 = load <32 x i8>, ptr %a1 ++ %cmp = icmp ugt <32 x i8> %v0, %v1 ++ %ext = sext <32 x i1> %cmp to <32 x i8> ++ store <32 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i16_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i16>, ptr %a0 ++ %v1 = load <16 x i16>, ptr %a1 ++ %cmp = icmp ugt <16 x i16> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i16> ++ store <16 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i32_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %a0 ++ %v1 = load <8 x i32>, ptr %a1 ++ %cmp = icmp ugt <8 x i32> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i32> ++ store <8 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i64_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvld $xr1, $a2, 0 ++; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %a0 ++ %v1 = load <4 x i64>, ptr %a1 ++ %cmp = icmp ugt <4 x i64> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i64> ++ store <4 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll +new file mode 100644 +index 000000000000..53fbf0b2f86f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll +@@ -0,0 +1,692 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; TREU ++define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_true: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vrepli.b $vr0, -1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp true <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++;; FALSE ++define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_false: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vrepli.b $vr0, 0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp false <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOEQ ++define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp oeq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_oeq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp oeq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUEQ ++define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ueq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ueq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ueq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETEQ ++define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast oeq <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ueq <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLE ++define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ole <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ole: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ole <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ule <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ule <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast ole <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_le: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ule <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETOLT ++define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp olt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_olt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp olt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ult <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ult <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast olt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_lt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ult <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETONE ++define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp one <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_one: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp one <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUNE ++define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp une <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_une: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp une <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETNE ++define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast one <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast une <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETO ++define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ord <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ord: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ord <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETUO ++define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp uno <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_uno: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp uno <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGT ++define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ogt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ogt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ogt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp ugt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp ugt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast ogt <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_gt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast ugt <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETOGE ++define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp oge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_oge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp oge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp uge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp uge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4f32_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %cmp = fcmp fast oge <4 x float> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2f64_fcmp_ge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %cmp = fcmp fast uge <2 x double> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll +new file mode 100644 +index 000000000000..448f3fa6c6e0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll +@@ -0,0 +1,939 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; SETEQ ++define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp eq <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp eq <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp eq <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp eq <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp eq <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp eq <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_eq_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp eq <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_eq: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp eq <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLE ++define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp sle <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sle <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp sle <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sle <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp sle <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sle <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_sle_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp sle <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sle: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sle <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULE ++define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp ule <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ule <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp ule <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ule <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp ule <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ule <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_ule_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslei.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp ule <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ule: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ule <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETLT ++define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.b $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp slt <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp slt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.h $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp slt <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp slt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.w $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp slt <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp slt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_slt_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.d $vr0, $vr0, 15 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp slt <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_slt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp slt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; SETULT ++define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v16i8_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %cmp = icmp ult <16 x i8> %v0, ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ult <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v8i16_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %cmp = icmp ult <8 x i16> %v0, ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ult <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v4i32_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %cmp = icmp ult <4 x i32> %v0, ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ult <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: v2i64_icmp_ult_imm: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vslti.du $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %cmp = icmp ult <2 x i64> %v0, ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ult: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ult <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETNE ++define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vxori.b $vr0, $vr0, 255 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ne <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ne <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ne <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ne: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a2, 0 ++; CHECK-NEXT: vld $vr1, $a1, 0 ++; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vrepli.b $vr1, -1 ++; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ne <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGE ++define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sge <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sge <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sge <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sge <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGE ++define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp uge <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp uge <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp uge <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_uge: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp uge <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETGT ++define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp sgt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp sgt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp sgt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_sgt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp sgt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} ++ ++;; Expand SETUGT ++define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v16i8_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <16 x i8>, ptr %a0 ++ %v1 = load <16 x i8>, ptr %a1 ++ %cmp = icmp ugt <16 x i8> %v0, %v1 ++ %ext = sext <16 x i1> %cmp to <16 x i8> ++ store <16 x i8> %ext, ptr %res ++ ret void ++} ++ ++define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v8i16_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i16>, ptr %a0 ++ %v1 = load <8 x i16>, ptr %a1 ++ %cmp = icmp ugt <8 x i16> %v0, %v1 ++ %ext = sext <8 x i1> %cmp to <8 x i16> ++ store <8 x i16> %ext, ptr %res ++ ret void ++} ++ ++define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v4i32_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %a0 ++ %v1 = load <4 x i32>, ptr %a1 ++ %cmp = icmp ugt <4 x i32> %v0, %v1 ++ %ext = sext <4 x i1> %cmp to <4 x i32> ++ store <4 x i32> %ext, ptr %res ++ ret void ++} ++ ++define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { ++; CHECK-LABEL: v2i64_icmp_ugt: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vld $vr1, $a2, 0 ++; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %a0 ++ %v1 = load <2 x i64>, ptr %a1 ++ %cmp = icmp ugt <2 x i64> %v0, %v1 ++ %ext = sext <2 x i1> %cmp to <2 x i64> ++ store <2 x i64> %ext, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0034-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch b/0034-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2fe04a2fe6279424c008b24f420855d5b63fe7b --- /dev/null +++ b/0034-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch @@ -0,0 +1,374 @@ +From 1cb397dd6c4dc53e44d3f61906c6464a52342c73 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 8 Dec 2023 14:16:26 +0800 +Subject: [PATCH 34/66] [LoongArch] Make ISD::FSQRT a legal operation with + lsx/lasx feature (#74795) + +And add some patterns: +1. (fdiv 1.0, vector) +2. (fdiv 1.0, (fsqrt vector)) + +(cherry picked from commit 9f70e708a7d3fce97d63b626520351501455fca0) + +Change-Id: Ibda9c700e8cef28b7eea391c4e78de99b53f7634 +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 + + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 45 +++++++++++++ + llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll | 65 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/fdiv.ll | 29 +++++++++ + llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll | 65 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fdiv.ll | 29 +++++++++ + 7 files changed, 257 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 3d5ae6d3deda..8c54c7cf2cab 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -260,6 +260,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setOperationAction(ISD::FSQRT, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +@@ -300,6 +301,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); ++ setOperationAction(ISD::FSQRT, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index a9bf65c6840d..55b90f4450c0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1092,6 +1092,13 @@ multiclass PatXr { + (!cast(Inst#"_D") LASX256:$xj)>; + } + ++multiclass PatXrF { ++ def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), ++ (!cast(Inst#"_S") LASX256:$xj)>; ++ def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), ++ (!cast(Inst#"_D") LASX256:$xj)>; ++} ++ + multiclass PatXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; +@@ -1448,6 +1455,21 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVFSQRT_{S/D} ++defm : PatXrF; ++ ++// XVRECIP_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), ++ (XVFRECIP_S v8f32:$xj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), ++ (XVFRECIP_D v4f64:$xj)>; ++ ++// XVFRSQRT_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), ++ (XVFRSQRT_S v8f32:$xj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), ++ (XVFRSQRT_D v4f64:$xj)>; ++ + // XVSEQ[I]_{B/H/W/D} + defm : PatCCXrSimm5; + defm : PatCCXrXr; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index ff21c6681271..8ad0c5904f25 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -95,6 +95,29 @@ def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; + }]>; + ++def vsplatf32_fpimm_eq_1 ++ : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), ++ (bitconvert (v8i32 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && ++ Imm == APFloat(+1.0f).bitcastToAPInt(); ++}]>; ++def vsplatf64_fpimm_eq_1 ++ : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), ++ (bitconvert (v4i64 (build_vector)))], [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ N = N->getOperand(0).getNode(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && ++ Imm == APFloat(+1.0).bitcastToAPInt(); ++}]>; ++ + def vsplati8imm7 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati8_imm_eq_7)>; + def vsplati16imm15 : PatFrag<(ops node:$reg), +@@ -1173,6 +1196,13 @@ multiclass PatVr { + (!cast(Inst#"_D") LSX128:$vj)>; + } + ++multiclass PatVrF { ++ def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), ++ (!cast(Inst#"_S") LSX128:$vj)>; ++ def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), ++ (!cast(Inst#"_D") LSX128:$vj)>; ++} ++ + multiclass PatVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; +@@ -1525,6 +1555,21 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VFSQRT_{S/D} ++defm : PatVrF; ++ ++// VFRECIP_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), ++ (VFRECIP_S v4f32:$vj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), ++ (VFRECIP_D v2f64:$vj)>; ++ ++// VFRSQRT_{S/D} ++def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), ++ (VFRSQRT_S v4f32:$vj)>; ++def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), ++ (VFRSQRT_D v2f64:$vj)>; ++ + // VSEQ[I]_{B/H/W/D} + defm : PatCCVrSimm5; + defm : PatCCVrVr; +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +new file mode 100644 +index 000000000000..c4a881bdeae9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +@@ -0,0 +1,65 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++;; fsqrt ++define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0, align 16 ++ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) ++ store <8 x float> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0, align 16 ++ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) ++ store <4 x double> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++;; 1.0 / (fsqrt vec) ++define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0, align 16 ++ %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) ++ %div = fdiv <8 x float> , %sqrt ++ store <8 x float> %div, ptr %res, align 16 ++ ret void ++} ++ ++define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0, align 16 ++ %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) ++ %div = fdiv <4 x double> , %sqrt ++ store <4 x double> %div, ptr %res, align 16 ++ ret void ++} ++ ++declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) ++declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +index 284121a79a49..6004565b0b78 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +@@ -32,3 +32,32 @@ entry: + store <4 x double> %v2, ptr %res + ret void + } ++ ++;; 1.0 / vec ++define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrecip.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %div = fdiv <8 x float> , %v0 ++ store <8 x float> %div, ptr %res ++ ret void ++} ++ ++define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvfrecip.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %div = fdiv <4 x double> , %v0 ++ store <4 x double> %div, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +new file mode 100644 +index 000000000000..a57bc1ca0e94 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +@@ -0,0 +1,65 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++;; fsqrt ++define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfsqrt.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0, align 16 ++ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) ++ store <4 x float> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: sqrt_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfsqrt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0, align 16 ++ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) ++ store <2 x double> %sqrt, ptr %res, align 16 ++ ret void ++} ++ ++;; 1.0 / (fsqrt vec) ++define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0, align 16 ++ %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) ++ %div = fdiv <4 x float> , %sqrt ++ store <4 x float> %div, ptr %res, align 16 ++ ret void ++} ++ ++define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_div_sqrt_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0, align 16 ++ %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) ++ %div = fdiv <2 x double> , %sqrt ++ store <2 x double> %div, ptr %res, align 16 ++ ret void ++} ++ ++declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) ++declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +index eb7c8bd9616e..5f1ee9e4d212 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +@@ -32,3 +32,32 @@ entry: + store <2 x double> %v2, ptr %res + ret void + } ++ ++;; 1.0 / vec ++define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrecip.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %div = fdiv <4 x float> , %v0 ++ store <4 x float> %div, ptr %res ++ ret void ++} ++ ++define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: one_fdiv_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vfrecip.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %div = fdiv <2 x double> , %v0 ++ store <2 x double> %div, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0035-LoongArch-Mark-ISD-FNEG-as-legal.patch b/0035-LoongArch-Mark-ISD-FNEG-as-legal.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c1e845a0df5aff99c9c0573cd8fe5cd57de1fff --- /dev/null +++ b/0035-LoongArch-Mark-ISD-FNEG-as-legal.patch @@ -0,0 +1,141 @@ +From db8e4fd0222d4c3fb62d4ebf1ac19df1f18902df Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 8 Dec 2023 14:21:10 +0800 +Subject: [PATCH 35/66] [LoongArch] Mark ISD::FNEG as legal + +(cherry picked from commit cdc37325669c0321328a7245083c427b229e79e9) + +Change-Id: I8c0569c56f96eee9c066b892e62cb7b90b4aab0a +--- + .../LoongArch/LoongArchISelLowering.cpp | 2 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 4 +++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 4 +++ + .../LoongArch/lasx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fneg.ll | 29 +++++++++++++++++++ + 5 files changed, 68 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 8c54c7cf2cab..c7f4b1d24f07 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -261,6 +261,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); ++ setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +@@ -302,6 +303,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); ++ setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 55b90f4450c0..8559baa0e525 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1605,6 +1605,10 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), + (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + ++// fneg ++def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; ++def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 8ad0c5904f25..5947f241bb59 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1712,6 +1712,10 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), + (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; + ++// fneg ++def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; ++def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll +new file mode 100644 +index 000000000000..5eb468fc55a0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll +@@ -0,0 +1,29 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v8f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = fneg <8 x float> %v0 ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v4f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = fneg <4 x double> %v0 ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll +new file mode 100644 +index 000000000000..795c1ac8b368 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll +@@ -0,0 +1,29 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v4f32: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = fneg <4 x float> %v0 ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: fneg_v2f64: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = fneg <2 x double> %v0 ++ store <2 x double> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0036-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch b/0036-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch new file mode 100644 index 0000000000000000000000000000000000000000..51f51730169a8533e28e9d0d2f66e8b4eb515a6a --- /dev/null +++ b/0036-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch @@ -0,0 +1,3342 @@ +From 3cfbf955ff5b4a50785d3e8e29abcb73fed7e747 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 11 Dec 2023 10:37:22 +0800 +Subject: [PATCH 36/66] [LoongArch] Add codegen support for + [X]VF{MSUB/NMADD/NMSUB}.{S/D} instructions (#74819) + +This is similar to single and double-precision floating-point +instructions. + +(cherry picked from commit af999c4be9f5643724c6f379690ecee4346b2b48) + +Change-Id: I4b946a4a4e6e0869aec43e6b085025a06a11e012 +--- + .../LoongArch/LoongArchLASXInstrInfo.td | 26 + + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 26 + + llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll | 804 ++++++++++++++++++ + llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll | 804 ++++++++++++++++++ + 6 files changed, 3268 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 8559baa0e525..ec6983d0f487 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1455,6 +1455,32 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + ++// XVFMSUB_{S/D} ++def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVFNMADD_{S/D} ++def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++// XVFNMSUB_{S/D} ++def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ + // XVFSQRT_{S/D} + defm : PatXrF; + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5947f241bb59..e468176885d7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1555,6 +1555,32 @@ def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + ++// VFMSUB_{S/D} ++def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), ++ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), ++ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VFNMADD_{S/D} ++def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++// VFNMSUB_{S/D} ++def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ + // VFSQRT_{S/D} + defm : PatVrF; + +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll +new file mode 100644 +index 000000000000..af18c52b096c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %sub = fsub<4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ %negadd = fneg<4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg nsz<4 x double> %v0 ++ %negv2 = fneg nsz<4 x double> %v2 ++ %mul = fmul nsz<4 x double> %negv0, %v1 ++ %add = fadd nsz<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmadd.d is not emitted. ++define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg<4 x double> %v0 ++ %negv2 = fneg<4 x double> %v2 ++ %mul = fmul<4 x double> %negv0, %v1 ++ %add = fadd<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv2 = fneg<4 x double> %v2 ++ %mul = fmul<4 x double> %v0, %v1 ++ %add = fadd<4 x double> %mul, %negv2 ++ %neg = fneg<4 x double> %add ++ store <4 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg nsz<4 x double> %v0 ++ %mul = fmul nsz<4 x double> %negv0, %v1 ++ %add = fadd nsz<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmsub.d is not emitted. ++define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg<4 x double> %v0 ++ %mul = fmul<4 x double> %negv0, %v1 ++ %add = fadd<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %sub = fsub contract <4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x double> %v0 ++ %negv2 = fneg contract nsz<4 x double> %v2 ++ %mul = fmul contract nsz<4 x double> %negv0, %v1 ++ %add = fadd contract nsz<4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmadd.d is not emitted. ++define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract <4 x double> %v0 ++ %negv2 = fneg contract <4 x double> %v2 ++ %mul = fmul contract <4 x double> %negv0, %v1 ++ %add = fadd contract <4 x double> %mul, %negv2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv2 = fneg contract <4 x double> %v2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %negv2 ++ %neg = fneg contract <4 x double> %add ++ store <4 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x double> %v0 ++ %mul = fmul contract nsz<4 x double> %negv0, %v1 ++ %add = fadd contract nsz<4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that xvfnmsub.d is not emitted. ++define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %negv0 = fneg contract <4 x double> %v0 ++ %mul = fmul contract <4 x double> %negv0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ store <4 x double> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %sub = fsub contract <4 x double> %mul, %v2 ++ store <4 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %add = fadd contract <4 x double> %mul, %v2 ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x double>, ptr %a0 ++ %v1 = load <4 x double>, ptr %a1 ++ %v2 = load <4 x double>, ptr %a2 ++ %mul = fmul contract <4 x double> %v0, %v1 ++ %negv2 = fneg contract <4 x double> %v2 ++ %add = fadd contract <4 x double> %negv2, %mul ++ %negadd = fneg contract <4 x double> %add ++ store <4 x double> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll +new file mode 100644 +index 000000000000..b7b3cb3a2e66 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %sub = fsub<8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ %negadd = fneg<8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg nsz<8 x float> %v0 ++ %negv2 = fneg nsz<8 x float> %v2 ++ %mul = fmul nsz<8 x float> %negv0, %v1 ++ %add = fadd nsz<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmadd.s is not emitted. ++define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg<8 x float> %v0 ++ %negv2 = fneg<8 x float> %v2 ++ %mul = fmul<8 x float> %negv0, %v1 ++ %add = fadd<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv2 = fneg<8 x float> %v2 ++ %mul = fmul<8 x float> %v0, %v1 ++ %add = fadd<8 x float> %mul, %negv2 ++ %neg = fneg<8 x float> %add ++ store <8 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg nsz<8 x float> %v0 ++ %mul = fmul nsz<8 x float> %negv0, %v1 ++ %add = fadd nsz<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmsub.s is not emitted. ++define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 ++; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg<8 x float> %v0 ++ %mul = fmul<8 x float> %negv0, %v1 ++ %add = fadd<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %sub = fsub contract <8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<8 x float> %v0 ++ %negv2 = fneg contract nsz<8 x float> %v2 ++ %mul = fmul contract nsz<8 x float> %negv0, %v1 ++ %add = fadd contract nsz<8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmadd.s is not emitted. ++define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract <8 x float> %v0 ++ %negv2 = fneg contract <8 x float> %v2 ++ %mul = fmul contract <8 x float> %negv0, %v1 ++ %add = fadd contract <8 x float> %mul, %negv2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv2 = fneg contract <8 x float> %v2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %negv2 ++ %neg = fneg contract <8 x float> %add ++ store <8 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<8 x float> %v0 ++ %mul = fmul contract nsz<8 x float> %negv0, %v1 ++ %add = fadd contract nsz<8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that fnmsub.s is not emitted. ++define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %negv0 = fneg contract <8 x float> %v0 ++ %mul = fmul contract <8 x float> %negv0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ store <8 x float> %add, ptr %res ++ ret void ++} ++ ++define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %sub = fsub contract <8 x float> %mul, %v2 ++ store <8 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %add = fadd contract <8 x float> %mul, %v2 ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: xvfnmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 ++; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 ++; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 ++; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 ++; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <8 x float>, ptr %a0 ++ %v1 = load <8 x float>, ptr %a1 ++ %v2 = load <8 x float>, ptr %a2 ++ %mul = fmul contract <8 x float> %v0, %v1 ++ %negv2 = fneg contract <8 x float> %v2 ++ %add = fadd contract <8 x float> %negv2, %mul ++ %negadd = fneg contract <8 x float> %add ++ store <8 x float> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll +new file mode 100644 +index 000000000000..8e0459b4afab +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %sub = fsub<2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ %negadd = fneg<2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg nsz<2 x double> %v0 ++ %negv2 = fneg nsz<2 x double> %v2 ++ %mul = fmul nsz<2 x double> %negv0, %v1 ++ %add = fadd nsz<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.d is not emitted. ++define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg<2 x double> %v0 ++ %negv2 = fneg<2 x double> %v2 ++ %mul = fmul<2 x double> %negv0, %v1 ++ %add = fadd<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv2 = fneg<2 x double> %v2 ++ %mul = fmul<2 x double> %v0, %v1 ++ %add = fadd<2 x double> %mul, %negv2 ++ %neg = fneg<2 x double> %add ++ store <2 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg nsz<2 x double> %v0 ++ %mul = fmul nsz<2 x double> %negv0, %v1 ++ %add = fadd nsz<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.d is not emitted. ++define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg<2 x double> %v0 ++ %mul = fmul<2 x double> %negv0, %v1 ++ %add = fadd<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %sub = fsub contract <2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<2 x double> %v0 ++ %negv2 = fneg contract nsz<2 x double> %v2 ++ %mul = fmul contract nsz<2 x double> %negv0, %v1 ++ %add = fadd contract nsz<2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.d is not emitted. ++define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract <2 x double> %v0 ++ %negv2 = fneg contract <2 x double> %v2 ++ %mul = fmul contract <2 x double> %negv0, %v1 ++ %add = fadd contract <2 x double> %mul, %negv2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv2 = fneg contract <2 x double> %v2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %negv2 ++ %neg = fneg contract <2 x double> %add ++ store <2 x double> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract nsz<2 x double> %v0 ++ %mul = fmul contract nsz<2 x double> %negv0, %v1 ++ %add = fadd contract nsz<2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.d is not emitted. ++define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %negv0 = fneg contract <2 x double> %v0 ++ %mul = fmul contract <2 x double> %negv0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ store <2 x double> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %sub = fsub contract <2 x double> %mul, %v2 ++ store <2 x double> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %add = fadd contract <2 x double> %mul, %v2 ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_d_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_d_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_d_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <2 x double>, ptr %a0 ++ %v1 = load <2 x double>, ptr %a1 ++ %v2 = load <2 x double>, ptr %a2 ++ %mul = fmul contract <2 x double> %v0, %v1 ++ %negv2 = fneg contract <2 x double> %v2 ++ %add = fadd contract <2 x double> %negv2, %mul ++ %negadd = fneg contract <2 x double> %add ++ store <2 x double> %negadd, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll +new file mode 100644 +index 000000000000..7efbd61c0c4f +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll +@@ -0,0 +1,804 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ++; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ++ ++define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %sub = fsub<4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ %negadd = fneg<4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg nsz<4 x float> %v0 ++ %negv2 = fneg nsz<4 x float> %v2 ++ %mul = fmul nsz<4 x float> %negv0, %v1 ++ %add = fadd nsz<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.s is not emitted. ++define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg<4 x float> %v0 ++ %negv2 = fneg<4 x float> %v2 ++ %mul = fmul<4 x float> %negv0, %v1 ++ %add = fadd<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv2 = fneg<4 x float> %v2 ++ %mul = fmul<4 x float> %v0, %v1 ++ %add = fadd<4 x float> %mul, %negv2 ++ %neg = fneg<4 x float> %add ++ store <4 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg nsz<4 x float> %v0 ++ %mul = fmul nsz<4 x float> %negv0, %v1 ++ %add = fadd nsz<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.s is not emitted. ++define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 ++; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg<4 x float> %v0 ++ %mul = fmul<4 x float> %negv0, %v1 ++ %add = fadd<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %sub = fsub contract <4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x float> %v0 ++ %negv2 = fneg contract nsz<4 x float> %v2 ++ %mul = fmul contract nsz<4 x float> %negv0, %v1 ++ %add = fadd contract nsz<4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmadd.s is not emitted. ++define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract <4 x float> %v0 ++ %negv2 = fneg contract <4 x float> %v2 ++ %mul = fmul contract <4 x float> %negv0, %v1 ++ %add = fadd contract <4 x float> %mul, %negv2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv2 = fneg contract <4 x float> %v2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %negv2 ++ %neg = fneg contract <4 x float> %add ++ store <4 x float> %neg, ptr %res ++ ret void ++} ++ ++define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract nsz<4 x float> %v0 ++ %mul = fmul contract nsz<4 x float> %negv0, %v1 ++ %add = fadd contract nsz<4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++;; Check that vfnmsub.s is not emitted. ++define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %negv0 = fneg contract <4 x float> %v0 ++ %mul = fmul contract <4 x float> %negv0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ store <4 x float> %add, ptr %res ++ ret void ++} ++ ++define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %sub = fsub contract <4 x float> %mul, %v2 ++ store <4 x float> %sub, ptr %res ++ ret void ++} ++ ++define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmadd_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmadd_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmadd_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %add = fadd contract <4 x float> %mul, %v2 ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} ++ ++define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ++; CONTRACT-FAST-LABEL: vfnmsub_s_contract: ++; CONTRACT-FAST: # %bb.0: # %entry ++; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-FAST-NEXT: ret ++; ++; CONTRACT-ON-LABEL: vfnmsub_s_contract: ++; CONTRACT-ON: # %bb.0: # %entry ++; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-ON-NEXT: ret ++; ++; CONTRACT-OFF-LABEL: vfnmsub_s_contract: ++; CONTRACT-OFF: # %bb.0: # %entry ++; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 ++; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 ++; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 ++; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 ++; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 ++; CONTRACT-OFF-NEXT: ret ++entry: ++ %v0 = load <4 x float>, ptr %a0 ++ %v1 = load <4 x float>, ptr %a1 ++ %v2 = load <4 x float>, ptr %a2 ++ %mul = fmul contract <4 x float> %v0, %v1 ++ %negv2 = fneg contract <4 x float> %v2 ++ %add = fadd contract <4 x float> %negv2, %mul ++ %negadd = fneg contract <4 x float> %add ++ store <4 x float> %negadd, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0037-LoongArch-Fix-LASX-vector_extract-codegen.patch b/0037-LoongArch-Fix-LASX-vector_extract-codegen.patch new file mode 100644 index 0000000000000000000000000000000000000000..0af6ed40e1f03c8dd63abd2adb71844a752462f2 --- /dev/null +++ b/0037-LoongArch-Fix-LASX-vector_extract-codegen.patch @@ -0,0 +1,329 @@ +From 0c21388d176b7f6d9249f47487c4368eec0ae508 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 25 Dec 2023 10:09:20 +0800 +Subject: [PATCH 37/66] [LoongArch] Fix LASX vector_extract codegen + +Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx. + +(cherry picked from commit 47c88bcd5de91522241cca1aaa1b7762ceb01394) + +Change-Id: Ia52cfd2091663eb04274f01409c8d7436552a359 +--- + .../LoongArch/LoongArchISelLowering.cpp | 21 +++- + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../LoongArch/LoongArchLASXInstrInfo.td | 40 ++---- + .../lasx/ir-instruction/extractelement.ll | 114 ++++++++++++++---- + 4 files changed, 119 insertions(+), 57 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index c7f4b1d24f07..cf881ce720a6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -277,7 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); +- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); +@@ -395,6 +395,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::EXTRACT_VECTOR_ELT: ++ return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: +@@ -502,6 +504,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + return SDValue(); + } + ++SDValue ++LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ EVT VecTy = Op->getOperand(0)->getValueType(0); ++ SDValue Idx = Op->getOperand(1); ++ EVT EltTy = VecTy.getVectorElementType(); ++ unsigned NumElts = VecTy.getVectorNumElements(); ++ ++ if (isa(Idx) && ++ (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || ++ EltTy == MVT::f64 || ++ cast(Idx)->getZExtValue() < NumElts / 2)) ++ return Op; ++ ++ return SDValue(); ++} ++ + SDValue + LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 2c35f9e5d378..6b5a851ec55d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -279,6 +279,7 @@ private: + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index ec6983d0f487..9b7a34688811 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), + (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; + def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), + (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; +-def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), +- (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; +-def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), +- (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; +-def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), +- (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; +-def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), +- (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; +- +-// Vector extraction with variable index. +-def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), +- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, +- i64:$rk), +- sub_32)), +- GPR), (i64 24))>; +-def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), +- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, +- i64:$rk), +- sub_32)), +- GPR), (i64 16))>; +-def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), +- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), +- sub_32)), +- GPR)>; +-def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), +- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), +- sub_64)), +- GPR)>; +-def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), +- (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; +-def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), +- (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; ++def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), ++ (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; ++def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), ++ (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; ++def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), ++ (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; ++def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), ++ (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + + // vselect + def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +index 78f584cd09a8..02b76bf75b75 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_8xi32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 + ; CHECK-NEXT: st.w $a0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src +@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_4xi64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 + ; CHECK-NEXT: st.d $a0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src +@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_8xfloat: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: ori $a0, $zero, 7 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 ++; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 ++; CHECK-NEXT: movgr2fr.w $fa0, $a0 + ; CHECK-NEXT: fst.s $fa0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src +@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { + ; CHECK-LABEL: extract_4xdouble: + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 ++; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 ++; CHECK-NEXT: movgr2fr.d $fa0, $a0 + ; CHECK-NEXT: fst.d $fa0, $a1, 0 + ; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src +@@ -84,12 +84,22 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { + define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_32xi8_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 +-; CHECK-NEXT: srai.w $a0, $a0, 24 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 ++; CHECK-NEXT: ld.b $a0, $a0, 0 + ; CHECK-NEXT: st.b $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 %idx +@@ -100,12 +110,22 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_16xi16_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 +-; CHECK-NEXT: srai.w $a0, $a0, 16 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 ++; CHECK-NEXT: ld.h $a0, $a0, 0 + ; CHECK-NEXT: st.h $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 %idx +@@ -116,11 +136,22 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_8xi32_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.s $a0, $fa0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: ld.w $a0, $a0, 0 + ; CHECK-NEXT: st.w $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 %idx +@@ -131,11 +162,22 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_4xi64_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 +-; CHECK-NEXT: movfr2gr.d $a0, $fa0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: ld.d $a0, $a0, 0 + ; CHECK-NEXT: st.d $a0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 %idx +@@ -146,10 +188,22 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_8xfloat_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 ++; CHECK-NEXT: fld.s $fa0, $a0, 0 + ; CHECK-NEXT: fst.s $fa0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 %idx +@@ -160,10 +214,22 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { + ; CHECK-LABEL: extract_4xdouble_idx: + ; CHECK: # %bb.0: +-; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $fp, $sp, 64 ++; CHECK-NEXT: srli.d $a3, $sp, 5 ++; CHECK-NEXT: slli.d $sp, $a3, 5 + ; CHECK-NEXT: xvld $xr0, $a0, 0 +-; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2 ++; CHECK-NEXT: xvst $xr0, $sp, 0 ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 ++; CHECK-NEXT: fld.d $fa0, $a0, 0 + ; CHECK-NEXT: fst.d $fa0, $a1, 0 ++; CHECK-NEXT: addi.d $sp, $fp, -64 ++; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 + ; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 %idx +-- +2.20.1 + diff --git a/0038-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch b/0038-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..10cf8159c68aa95b83af9b233044597b81fe494b --- /dev/null +++ b/0038-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch @@ -0,0 +1,61 @@ +From 487d08e5156f328b1f2d8da7b51c4c04c4e117d6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 27 Dec 2023 16:31:49 +0800 +Subject: [PATCH 38/66] [LoongArch] Fix incorrect pattern XVREPL128VEI_{W/D} + instructions + +Remove the incorrect patterns for `XVREPL128VEI_{W/D}` instructions, +and add correct patterns for XVREPLVE0_{W/D} instructions + +(cherry picked from commit c7367f985e0d27aeb8bc993406d1b9f4ca307399) + +Change-Id: Id9c36f5009941748f696a9a65bebf2c20de0c052 +--- + llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 6 +++--- + llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 9b7a34688811..059689cef840 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + +-// XVREPL128VEI_{W/D} ++// XVREPLVE0_{W/D} + def : Pat<(lasxsplatf32 FPR32:$fj), +- (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; ++ (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; + def : Pat<(lasxsplatf64 FPR64:$fj), +- (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; ++ (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; + + // Loads/Stores + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { +diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +index 6824ab5cda8d..ae6f31aaec64 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +@@ -57,7 +57,7 @@ define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { + ; CHECK-LABEL: buildvector_v8f32_splat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +-; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0 ++; CHECK-NEXT: xvreplve0.w $xr0, $xr0 + ; CHECK-NEXT: xvst $xr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +@@ -71,7 +71,7 @@ define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { + ; CHECK-LABEL: buildvector_v4f64_splat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +-; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ++; CHECK-NEXT: xvreplve0.d $xr0, $xr0 + ; CHECK-NEXT: xvst $xr0, $a0, 0 + ; CHECK-NEXT: ret + entry: +-- +2.20.1 + diff --git a/0039-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch b/0039-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch new file mode 100644 index 0000000000000000000000000000000000000000..54723ba75989bf3064e7f7c298ba65a3fd88ea88 --- /dev/null +++ b/0039-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch @@ -0,0 +1,89 @@ +From 5766f4f5b78d1a5e07022eda3fc46657bbfbe3a4 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Thu, 28 Dec 2023 20:56:32 +0800 +Subject: [PATCH 39/66] [LoongArch] Fix incorrect pattern [X]VBITSELI_B + instructions + +Adjusted the operand order of [X]VBITSELI_B to correctly match vselect. + +(cherry picked from commit da5378e87e11689d05a58198d6e15e9551916794) + +Change-Id: I60a3be6b52d03f4873c2ce4d41866afd510f3a7a +--- + llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 4 ++-- + llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 4 ++-- + llvm/test/CodeGen/LoongArch/lasx/vselect.ll | 6 +++--- + llvm/test/CodeGen/LoongArch/lsx/vselect.ll | 6 +++--- + 4 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 059689cef840..b3c11bc5423d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1600,8 +1600,8 @@ def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + + // vselect +-def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, +- (v32i8 (SplatPat_uimm8 uimm8:$imm)))), ++def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), ++ LASX256:$xj)), + (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; + foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index e468176885d7..5569c2cd15b5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + + // vselect +-def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, +- (v16i8 (SplatPat_uimm8 uimm8:$imm)))), ++def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), ++ LSX128:$vj)), + (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; + foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), +diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +index 24f4bcf752d3..ec2fc28db33c 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +@@ -6,11 +6,11 @@ define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { + ; CHECK: # %bb.0: + ; CHECK-NEXT: xvld $xr0, $a1, 0 + ; CHECK-NEXT: xvrepli.h $xr1, -256 +-; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 +-; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 ++; CHECK-NEXT: xvst $xr1, $a0, 0 + ; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 +- %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> ++ %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 + store <32 x i8> %sel, ptr %res + ret void + } +diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +index 00e3d9313f13..746152f0f026 100644 +--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll ++++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +@@ -6,11 +6,11 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { + ; CHECK: # %bb.0: + ; CHECK-NEXT: vld $vr0, $a1, 0 + ; CHECK-NEXT: vrepli.h $vr1, -256 +-; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 +-; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 ++; CHECK-NEXT: vst $vr1, $a0, 0 + ; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 +- %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> ++ %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void + } +-- +2.20.1 + diff --git a/0040-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch b/0040-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc18f772db5f793b0a14962cc8e52bae6302e21f --- /dev/null +++ b/0040-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch @@ -0,0 +1,189 @@ +From 959a4cd22a727480621a4dfbbdc2d2a61905dbe8 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 23 Jan 2024 09:06:35 +0800 +Subject: [PATCH 40/66] [LoongArch] Permit auto-vectorization using LSX/LASX + with `auto-vec` feature (#78943) + +With enough codegen complete, we can now correctly report the size of +vector registers for LSX/LASX, allowing auto vectorization (The +`auto-vec` feature needs to be enabled simultaneously). + +As described, the `auto-vec` feature is an experimental one. To ensure +that automatic vectorization is not enabled by default, because the +information provided by the current `TTI` cannot yield additional +benefits for automatic vectorization. + +(cherry picked from commit fcff4582f01db2f5a99e3acf452aec9f2d8a126a) + +Change-Id: I57b7e112bdf900edeb545990a6940bd19bd7bde4 +--- + llvm/lib/Target/LoongArch/LoongArch.td | 4 ++ + .../lib/Target/LoongArch/LoongArchSubtarget.h | 2 + + .../LoongArchTargetTransformInfo.cpp | 18 +++++ + .../LoongArch/LoongArchTargetTransformInfo.h | 2 + + .../LoopVectorize/LoongArch/defaults.ll | 66 +++++++++++++++++++ + .../LoopVectorize/LoongArch/lit.local.cfg | 4 ++ + 6 files changed, 96 insertions(+) + create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll + create mode 100644 llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 75b65fe69f26..2a4c991a43b0 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -105,6 +105,10 @@ def FeatureUAL + def FeatureRelax + : SubtargetFeature<"relax", "HasLinkerRelax", "true", + "Enable Linker relaxation">; ++// Experimental auto vectorization ++def FeatureAutoVec ++ : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", ++ "Experimental auto vectorization">; + + //===----------------------------------------------------------------------===// + // Registers, instruction descriptions ... +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index 5c173675cca4..174e4cba8326 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + bool HasLaLocalWithAbs = false; + bool HasUAL = false; + bool HasLinkerRelax = false; ++ bool HasExpAutoVec = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +@@ -102,6 +103,7 @@ public: + bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } + bool hasUAL() const { return HasUAL; } + bool hasLinkerRelax() const { return HasLinkerRelax; } ++ bool hasExpAutoVec() const { return HasExpAutoVec; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +index a6de86eea116..04349aa52b54 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -19,4 +19,22 @@ using namespace llvm; + + #define DEBUG_TYPE "loongarchtti" + ++TypeSize LoongArchTTIImpl::getRegisterBitWidth( ++ TargetTransformInfo::RegisterKind K) const { ++ switch (K) { ++ case TargetTransformInfo::RGK_Scalar: ++ return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); ++ case TargetTransformInfo::RGK_FixedWidthVector: ++ if (ST->hasExtLASX() && ST->hasExpAutoVec()) ++ return TypeSize::getFixed(256); ++ if (ST->hasExtLSX() && ST->hasExpAutoVec()) ++ return TypeSize::getFixed(128); ++ return TypeSize::getFixed(0); ++ case TargetTransformInfo::RGK_ScalableVector: ++ return TypeSize::getScalable(0); ++ } ++ ++ llvm_unreachable("Unsupported register kind"); ++} ++ + // TODO: Implement more hooks to provide TTI machinery for LoongArch. +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +index 9e02f793ba8a..d296c9ed576f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +@@ -39,6 +39,8 @@ public: + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + ++ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; ++ + // TODO: Implement more hooks to provide TTI machinery for LoongArch. + }; + +diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +new file mode 100644 +index 000000000000..a8ac2411dd82 +--- /dev/null ++++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +@@ -0,0 +1,66 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ++; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s ++ ++;; This is a collection of tests whose only purpose is to show changes in the ++;; default configuration. Please keep these tests minimal - if you're testing ++;; functionality of some specific configuration, please place that in a ++;; seperate test file with a hard coded configuration (even if that ++;; configuration is the current default). ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64" ++ ++define void @vector_add(ptr noalias nocapture %a, i64 %v) { ++; CHECK-LABEL: define void @vector_add ++; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ++; CHECK-NEXT: entry: ++; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ++; CHECK: vector.ph: ++; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 ++; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ++; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ++; CHECK: vector.body: ++; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ++; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ++; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ++; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ++; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ++; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ++; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 ++; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ++; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ++; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ++; CHECK: middle.block: ++; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ++; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ++; CHECK: scalar.ph: ++; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ++; CHECK-NEXT: br label [[FOR_BODY:%.*]] ++; CHECK: for.body: ++; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ++; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ++; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ++; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] ++; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ++; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ++; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ++; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ++; CHECK: for.end: ++; CHECK-NEXT: ret void ++; ++entry: ++ br label %for.body ++ ++for.body: ++ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] ++ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv ++ %elem = load i64, ptr %arrayidx ++ %add = add i64 %elem, %v ++ store i64 %add, ptr %arrayidx ++ %iv.next = add nuw nsw i64 %iv, 1 ++ %exitcond.not = icmp eq i64 %iv.next, 1024 ++ br i1 %exitcond.not, label %for.end, label %for.body ++ ++for.end: ++ ret void ++} +diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..9570af17fe5f +--- /dev/null ++++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg +@@ -0,0 +1,4 @@ ++config.suffixes = [".ll"] ++ ++if not "LoongArch" in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + diff --git a/0041-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch b/0041-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ddcbdef0776e6e6f592161ccfc6537a2328e6c0 --- /dev/null +++ b/0041-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch @@ -0,0 +1,299 @@ +From a5bc7ef181511a199bf6e042f02c431ad667b52a Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 23 Jan 2024 15:16:23 +0800 +Subject: [PATCH 41/66] [CodeGen][LoongArch] Set SINT_TO_FP/UINT_TO_FP to legal + for vector types (#78924) + +Support the following conversions: +v4i32->v4f32, v2i64->v2f64(LSX) +v8i32->v8f32, v4i64->v4f64(LASX) +v4i32->v4f64, v4i64->v4f32(LASX) + +(cherry picked from commit f799f936929c232a16abc7c520a10fecadbf05f9) + +Change-Id: Ib8460a68a74d9d69edbdf6abc8bf193dcaeb7c13 +--- + .../LoongArch/LoongArchISelLowering.cpp | 4 ++ + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ + .../LoongArch/lasx/ir-instruction/sitofp.ll | 57 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/uitofp.ll | 57 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/sitofp.ll | 28 +++++++++ + .../LoongArch/lsx/ir-instruction/uitofp.ll | 28 +++++++++ + 7 files changed, 204 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index cf881ce720a6..7a360b42e15d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -256,6 +256,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, ++ {MVT::v4i32, MVT::v2i64}, Legal); + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +@@ -298,6 +300,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, ++ {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index b3c11bc5423d..b3e74b480922 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1611,6 +1611,28 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; + def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; + ++// XVFFINT_{S_W/D_L} ++def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; ++def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; ++def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), ++ (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, ++ sub_128)))>; ++def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), ++ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), ++ (XVFFINT_D_L v4i64:$vj)), ++ sub_128)>; ++ ++// XVFFINT_{S_WU/D_LU} ++def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; ++def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; ++def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), ++ (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, ++ sub_128)))>; ++def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), ++ (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), ++ (XVFFINT_D_LU v4i64:$vj)), ++ sub_128)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 5569c2cd15b5..63eac4d1aeb7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1742,6 +1742,14 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; + def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; + ++// VFFINT_{S_W/D_L} ++def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; ++def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; ++ ++// VFFINT_{S_WU/D_LU} ++def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; ++def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll +new file mode 100644 +index 000000000000..208a758ea4e9 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v8i32_v8f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.s.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %in ++ %v1 = sitofp <8 x i32> %v0 to <8 x float> ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4f64_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = sitofp <4 x i64> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i64_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = sitofp <4 x i64> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i32_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: xvffint.d.l $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = sitofp <4 x i32> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll +new file mode 100644 +index 000000000000..70cf71c4cec2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v8i32_v8f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x i32>, ptr %in ++ %v1 = uitofp <8 x i32> %v0 to <8 x float> ++ store <8 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4f64_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = uitofp <4 x i64> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i64_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i64>, ptr %in ++ %v1 = uitofp <4 x i64> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i32_v4f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = uitofp <4 x i32> %v0 to <4 x double> ++ store <4 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll +new file mode 100644 +index 000000000000..1e820a37a240 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v4i32_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.s.w $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = sitofp <4 x i32> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ ++; CHECK-LABEL: sitofp_v2i64_v2f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.d.l $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %in ++ %v1 = sitofp <2 x i64> %v0 to <2 x double> ++ store <2 x double> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll +new file mode 100644 +index 000000000000..3d4913f12e57 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v4i32_v4f32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.s.wu $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x i32>, ptr %in ++ %v1 = uitofp <4 x i32> %v0 to <4 x float> ++ store <4 x float> %v1, ptr %res ++ ret void ++} ++ ++define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ ++; CHECK-LABEL: uitofp_v2i64_v2f64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vffint.d.lu $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x i64>, ptr %in ++ %v1 = uitofp <2 x i64> %v0 to <2 x double> ++ store <2 x double> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0042-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch b/0042-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb267d2fde2ffad6b2d32610c22470b1342afdd1 --- /dev/null +++ b/0042-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch @@ -0,0 +1,307 @@ +From 8f6d308508cd7e8568df5268230cf60d47cd9dbe Mon Sep 17 00:00:00 2001 +From: yjijd +Date: Tue, 23 Jan 2024 15:57:06 +0800 +Subject: [PATCH 42/66] [CodeGen][LoongArch] Set FP_TO_SINT/FP_TO_UINT to legal + for vector types (#79107) + +Support the following conversions: +v4f32->v4i32, v2f64->v2i64(LSX) +v8f32->v8i32, v4f64->v4i64(LASX) +v4f32->v4i64, v4f64->v4i32(LASX) + +(cherry picked from commit 44ba6ebc999d6e9b27bedfe04a993adfd204dc6a) + +Change-Id: I879a480fb9afa730144edf2a3ce4304793ef60ec +--- + .../LoongArch/LoongArchISelLowering.cpp | 12 ++-- + .../LoongArch/LoongArchLASXInstrInfo.td | 22 +++++++ + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 8 +++ + .../LoongArch/lasx/ir-instruction/fptosi.ll | 57 +++++++++++++++++++ + .../LoongArch/lasx/ir-instruction/fptoui.ll | 57 +++++++++++++++++++ + .../LoongArch/lsx/ir-instruction/fptosi.ll | 28 +++++++++ + .../LoongArch/lsx/ir-instruction/fptoui.ll | 28 +++++++++ + 7 files changed, 208 insertions(+), 4 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll + create mode 100644 llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 7a360b42e15d..f7eacd56c542 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -256,8 +256,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } +- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, +- {MVT::v4i32, MVT::v2i64}, Legal); ++ for (MVT VT : {MVT::v4i32, MVT::v2i64}) { ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); ++ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); ++ } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +@@ -300,8 +302,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } +- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, +- {MVT::v8i32, MVT::v4i32, MVT::v4i64}, Legal); ++ for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { ++ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); ++ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); ++ } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index b3e74b480922..492b62da6ce7 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -1633,6 +1633,28 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), + (XVFFINT_D_LU v4i64:$vj)), + sub_128)>; + ++// XVFTINTRZ_{W_S/L_D} ++def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; ++def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; ++def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), ++ (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), ++ sub_128))>; ++def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), ++ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), ++ v4f64:$vj)), ++ sub_128)>; ++ ++// XVFTINTRZ_{W_SU/L_DU} ++def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; ++def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; ++def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), ++ (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), ++ sub_128))>; ++def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), ++ (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), ++ v4f64:$vj)), ++ sub_128)>; ++ + } // Predicates = [HasExtLASX] + + /// Intrinsic pattern +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 63eac4d1aeb7..99ac2f3c162f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -1750,6 +1750,14 @@ def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; + def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; + def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; + ++// VFTINTRZ_{W_S/L_D} ++def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; ++def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; ++ ++// VFTINTRZ_{W_SU/L_DU} ++def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; ++def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; ++ + } // Predicates = [HasExtLSX] + + /// Intrinsic pattern +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +new file mode 100644 +index 000000000000..0d9f57b57ffa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v8f32_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %in ++ %v1 = fptosi <8 x float> %v0 to <8 x i32> ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f64_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptosi <4 x double> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f64_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptosi <4 x double> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptosi <4 x float> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +new file mode 100644 +index 000000000000..27d70f33cd34 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +@@ -0,0 +1,57 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v8f32_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <8 x float>, ptr %in ++ %v1 = fptoui <8 x float> %v0 to <8 x i32> ++ store <8 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f64_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptoui <4 x double> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f64_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $a1, 0 ++; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 ++; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 ++; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x double>, ptr %in ++ %v1 = fptoui <4 x double> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f32_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 ++; CHECK-NEXT: xvst $xr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptoui <4 x float> %v0 to <4 x i64> ++ store <4 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll +new file mode 100644 +index 000000000000..c3008fe96e47 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v4f32_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptosi <4 x float> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptosi_v2f64_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %in ++ %v1 = fptosi <2 x double> %v0 to <2 x i64> ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll +new file mode 100644 +index 000000000000..f0aeb0bd14e7 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll +@@ -0,0 +1,28 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v4f32_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <4 x float>, ptr %in ++ %v1 = fptoui <4 x float> %v0 to <4 x i32> ++ store <4 x i32> %v1, ptr %res ++ ret void ++} ++ ++define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ ++; CHECK-LABEL: fptoui_v2f64_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $a1, 0 ++; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 ++; CHECK-NEXT: vst $vr0, $a0, 0 ++; CHECK-NEXT: ret ++ %v0 = load <2 x double>, ptr %in ++ %v1 = fptoui <2 x double> %v0 to <2 x i64> ++ store <2 x i64> %v1, ptr %res ++ ret void ++} +-- +2.20.1 + diff --git a/0043-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch b/0043-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..daa10419d12d993a2ae776d96e073473065a3a2f --- /dev/null +++ b/0043-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch @@ -0,0 +1,53 @@ +From 8da55d54445ddecf8a751368828d46425afc4ce6 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 26 Jan 2024 10:24:07 +0800 +Subject: [PATCH 43/66] [LoongArch] Fixing the incorrect return value of + LoongArchTTIImpl::getRegisterBitWidth (#79441) + +When we do not enable vector features, we should return the default +value (`TargetTransformInfoImplBase::getRegisterBitWidth`) instead of +zero. + +This should fix the LoongArch [buildbot +breakage](https://lab.llvm.org/staging/#/builders/5/builds/486) from + +(cherry picked from commit 1e9924c1f248bbddcb95d82a59708d617297dad3) +(cherry picked from commit 900e7cbfdee09c94d022e4dae923b3c7827f95e3) + +Change-Id: I6e6a5cd987e1129872980c09441f5ca176069ddd +--- + .../Target/LoongArch/LoongArchTargetTransformInfo.cpp | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +index 04349aa52b54..d47dded9ea6e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -21,17 +21,20 @@ using namespace llvm; + + TypeSize LoongArchTTIImpl::getRegisterBitWidth( + TargetTransformInfo::RegisterKind K) const { ++ TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); + case TargetTransformInfo::RGK_FixedWidthVector: +- if (ST->hasExtLASX() && ST->hasExpAutoVec()) ++ if (!ST->hasExpAutoVec()) ++ return DefSize; ++ if (ST->hasExtLASX()) + return TypeSize::getFixed(256); +- if (ST->hasExtLSX() && ST->hasExpAutoVec()) ++ if (ST->hasExtLSX()) + return TypeSize::getFixed(128); +- return TypeSize::getFixed(0); ++ [[fallthrough]]; + case TargetTransformInfo::RGK_ScalableVector: +- return TypeSize::getScalable(0); ++ return DefSize; + } + + llvm_unreachable("Unsupported register kind"); +-- +2.20.1 + diff --git a/0044-LoongArch-Pre-commit-test-for-76913.-NFC.patch b/0044-LoongArch-Pre-commit-test-for-76913.-NFC.patch new file mode 100644 index 0000000000000000000000000000000000000000..03d3c6db7bdb7064dfc3b57da8dd268ba2d643b3 --- /dev/null +++ b/0044-LoongArch-Pre-commit-test-for-76913.-NFC.patch @@ -0,0 +1,72 @@ +From c27980fffb9082f2dfed259f0ef441badf3d7cb7 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 9 Jan 2024 20:32:20 +0800 +Subject: [PATCH 44/66] [LoongArch] Pre-commit test for #76913. NFC + +This test will crash with expensive check. + +Crash message: +``` +*** Bad machine code: Using an undefined physical register *** +- function: main +- basic block: %bb.0 entry (0x20fee70) +- instruction: $r3 = frame-destroy ADDI_D $r22, -288 +- operand 1: $r22 +``` + +(cherry picked from commit f499472de3e1184b83fc6cd78bc244a55f2cac7d) + +Change-Id: I91aa251ada69ef63ce6f80ff203540b0cb55871a +--- + .../LoongArch/can-not-realign-stack.ll | 39 +++++++++++++++++++ + 1 file changed, 39 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll + +diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +new file mode 100644 +index 000000000000..526821076498 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +@@ -0,0 +1,39 @@ ++; REQUIRES: expensive_checks ++; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s ++ ++; XFAIL: * ++ ++;; FIXME: This test will crash with expensive check. The subsequent patch will ++;; address and fix this issue. ++ ++%struct.S = type { [64 x i16] } ++ ++define dso_local noundef signext i32 @main() nounwind { ++entry: ++ %s = alloca %struct.S, align 2 ++ call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) ++ store <16 x i16> , ptr %s, align 2 ++ %0 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 ++ store <16 x i16> , ptr %0, align 2 ++ %1 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 ++ store <16 x i16> , ptr %1, align 2 ++ %2 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 ++ store <16 x i16> , ptr %2, align 2 ++ call void @foo(ptr noundef nonnull %s) ++ store <16 x i16> , ptr %s, align 2 ++ %3 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 ++ store <16 x i16> , ptr %3, align 2 ++ %4 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 ++ store <16 x i16> , ptr %4, align 2 ++ %5 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 ++ store <16 x i16> , ptr %5, align 2 ++ call void @bar(ptr noundef nonnull %s) ++ call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %s) ++ ret i32 0 ++} ++ ++declare void @foo(ptr nocapture noundef) ++declare void @bar(ptr nocapture noundef) ++ ++declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) ++declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) +-- +2.20.1 + diff --git a/0045-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch b/0045-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0d5506652a8d3c4011c2c28efb31ebd98defc5e --- /dev/null +++ b/0045-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch @@ -0,0 +1,151 @@ +From 4f2d73c1506aa56fddea5ad41d2acea97e1b36e3 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 9 Jan 2024 20:35:49 +0800 +Subject: [PATCH 45/66] [LoongArch] Implement + LoongArchRegisterInfo::canRealignStack() (#76913) + +This patch fixes the crash issue in the test: +CodeGen/LoongArch/can-not-realign-stack.ll + +Register allocator may spill virtual registers to the stack, which +introduces stack alignment requirements (when the size of spilled + registers exceeds the default alignment size of the stack). If a +function does not have stack alignment requirements before register +allocation, registers used for stack alignment will not be preserved. + +Therefore, we should implement `canRealignStack()` to inform the +register allocator whether it is allowed to perform stack realignment +operations. + +(cherry picked from commit 98c6aa72299caeff6b188e1ff2fc1b39c5b893b6) + +Change-Id: I43d38bf321b595a1f6a4899111db4f71660a2d9a +--- + .../LoongArch/LoongArchRegisterInfo.cpp | 23 ++++++++ + .../Target/LoongArch/LoongArchRegisterInfo.h | 1 + + .../LoongArch/can-not-realign-stack.ll | 56 +++++++++++++++++-- + 3 files changed, 75 insertions(+), 5 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +index 257b947a3ce4..092b5f1fb442 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +@@ -15,6 +15,7 @@ + #include "LoongArch.h" + #include "LoongArchInstrInfo.h" + #include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/MachineFunction.h" +@@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + return false; + } ++ ++bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { ++ if (!TargetRegisterInfo::canRealignStack(MF)) ++ return false; ++ ++ const MachineRegisterInfo *MRI = &MF.getRegInfo(); ++ const LoongArchFrameLowering *TFI = getFrameLowering(MF); ++ ++ // Stack realignment requires a frame pointer. If we already started ++ // register allocation with frame pointer elimination, it is too late now. ++ if (!MRI->canReserveReg(LoongArch::R22)) ++ return false; ++ ++ // We may also need a base pointer if there are dynamic allocas or stack ++ // pointer adjustments around calls. ++ if (TFI->hasReservedCallFrame(MF)) ++ return true; ++ ++ // A base pointer is required and allowed. Check that it isn't too late to ++ // reserve it. ++ return MRI->canReserveReg(LoongArchABI::getBPReg()); ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +index 7e8f26b14097..d1e40254c297 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +@@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } ++ bool canRealignStack(const MachineFunction &MF) const override; + }; + } // end namespace llvm + +diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +index 526821076498..af24ae64b7c7 100644 +--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll ++++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll +@@ -1,14 +1,60 @@ +-; REQUIRES: expensive_checks +-; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ++; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s | FileCheck %s + +-; XFAIL: * ++;; This test is checking that when a function allows stack realignment and ++;; realignment needs were not detected before register allocation (at this ++;; point, fp is not preserved), but realignment is required during register ++;; allocation, the stack should not undergo realignment. + +-;; FIXME: This test will crash with expensive check. The subsequent patch will +-;; address and fix this issue. ++;; Ensure that the `bstrins.d $sp, $zero, n, 0` instruction is not generated. ++;; n = log2(realign_size) - 1 + + %struct.S = type { [64 x i16] } + + define dso_local noundef signext i32 @main() nounwind { ++; CHECK-LABEL: main: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addi.d $sp, $sp, -272 ++; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1) ++; CHECK-NEXT: xvld $xr1, $a0, 0 ++; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr1, $sp, 224 ++; CHECK-NEXT: xvst $xr0, $sp, 192 ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr0, $sp, 160 ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3) ++; CHECK-NEXT: xvld $xr0, $a0, 0 ++; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill ++; CHECK-NEXT: xvst $xr0, $sp, 128 ++; CHECK-NEXT: addi.d $fp, $sp, 128 ++; CHECK-NEXT: move $a0, $fp ++; CHECK-NEXT: bl %plt(foo) ++; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 224 ++; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 192 ++; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 160 ++; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload ++; CHECK-NEXT: xvst $xr0, $sp, 128 ++; CHECK-NEXT: move $a0, $fp ++; CHECK-NEXT: bl %plt(bar) ++; CHECK-NEXT: move $a0, $zero ++; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 272 ++; CHECK-NEXT: ret + entry: + %s = alloca %struct.S, align 2 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) +-- +2.20.1 + diff --git a/0046-sanitizer-msan-VarArgHelper-for-loongarch64.patch b/0046-sanitizer-msan-VarArgHelper-for-loongarch64.patch new file mode 100644 index 0000000000000000000000000000000000000000..a5ee4c679785b534794dd2df96a2e9a6c059d499 --- /dev/null +++ b/0046-sanitizer-msan-VarArgHelper-for-loongarch64.patch @@ -0,0 +1,139 @@ +From 857dc000141b237da73a43d59e22672750501559 Mon Sep 17 00:00:00 2001 +From: zhanglimin +Date: Tue, 12 Sep 2023 09:51:16 +0800 +Subject: [PATCH 46/66] [sanitizer][msan] VarArgHelper for loongarch64 + +This patch adds support for variadic argument for loongarch64, +which is based on MIPS64. And `check-msan` all pass. + +Reviewed By: vitalybuka + +Differential Revision: https://reviews.llvm.org/D158587 + +(cherry picked from commit ec42c78cc43ac1e8364e5a0941aa5fc91b813dd3) +Change-Id: I8dbd46aa86ddc847538bc70fc556424dc9a8e23b +--- + .../Instrumentation/MemorySanitizer.cpp | 7 ++ + .../LoongArch/vararg-loongarch64.ll | 78 +++++++++++++++++++ + 2 files changed, 85 insertions(+) + create mode 100644 llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll + +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index 83d90049abc3..362fd6e4151f 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -4945,6 +4945,7 @@ struct VarArgAMD64Helper : public VarArgHelper { + }; + + /// MIPS64-specific implementation of VarArgHelper. ++/// NOTE: This is also used for LoongArch64. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; +@@ -5836,6 +5837,10 @@ struct VarArgSystemZHelper : public VarArgHelper { + } + }; + ++// Loongarch64 is not a MIPS, but the current vargs calling convention matches ++// the MIPS. ++using VarArgLoongArch64Helper = VarArgMIPS64Helper; ++ + /// A no-op implementation of VarArgHelper. + struct VarArgNoOpHelper : public VarArgHelper { + VarArgNoOpHelper(Function &F, MemorySanitizer &MS, +@@ -5868,6 +5873,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.isLoongArch64()) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +new file mode 100644 +index 000000000000..8a4ab59588ad +--- /dev/null ++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +@@ -0,0 +1,78 @@ ++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64-unknown-linux-gnu" ++ ++;; First, check allocation of the save area. ++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 ++declare void @llvm.va_start(ptr) #2 ++declare void @llvm.va_end(ptr) #2 ++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 ++define i32 @foo(i32 %guard, ...) { ++; CHECK-LABEL: @foo ++; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls ++; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] ++; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] ++; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) ++; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) ++; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) ++; ++ %vl = alloca ptr, align 8 ++ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) ++ call void @llvm.va_start(ptr %vl) ++ call void @llvm.va_end(ptr %vl) ++ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) ++ ret i32 0 ++} ++ ++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ++;; array. ++define i32 @bar() { ++; CHECK-LABEL: @bar ++; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 ++; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Check multiple fixed arguments. ++declare i32 @foo2(i32 %g1, i32 %g2, ...) ++define i32 @bar2() { ++; CHECK-LABEL: @bar2 ++; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ++;; passed to a variadic function. ++declare i64 @sum(i64 %n, ...) ++define dso_local i64 @many_args() { ++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. ++; CHECK-LABEL: @many_args ++; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) ++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) ++; ++entry: ++ %ret = call i64 (i64, ...) @sum(i64 120, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 ++ ) ++ ret i64 %ret ++} +-- +2.20.1 + diff --git a/0047-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch b/0047-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch new file mode 100644 index 0000000000000000000000000000000000000000..56290a7d49a21f56a7bf39384b260c537498330a --- /dev/null +++ b/0047-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch @@ -0,0 +1,128 @@ +From 866e3ec267eb98445441ba8d9e9fb2129a23c4a1 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:26:18 +0800 +Subject: [PATCH 47/66] [LowerTypeTests] Add loongarch64 to CFI jumptables + (#67312) + +This patch implements jump tables for loongarch64. + +(cherry picked from commit 0e8a8c85f8765c086c573f36e60c895920381e18) +Change-Id: I917165f7d28d3c360f5598e46372e871d98117b2 +--- + llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 9 ++++++++- + llvm/test/Transforms/LowerTypeTests/function-weak.ll | 2 ++ + llvm/test/Transforms/LowerTypeTests/function.ll | 9 +++++++++ + 3 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +index 9b4b3efd7283..a89d57d12615 100644 +--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp ++++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +@@ -1196,6 +1196,7 @@ static const unsigned kARMJumpTableEntrySize = 4; + static const unsigned kARMBTIJumpTableEntrySize = 8; + static const unsigned kARMv6MJumpTableEntrySize = 16; + static const unsigned kRISCVJumpTableEntrySize = 8; ++static const unsigned kLOONGARCH64JumpTableEntrySize = 8; + + unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + switch (JumpTableArch) { +@@ -1222,6 +1223,8 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; ++ case Triple::loongarch64: ++ return kLOONGARCH64JumpTableEntrySize; + default: + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1286,6 +1289,9 @@ void LowerTypeTestsModule::createJumpTableEntry( + } else if (JumpTableArch == Triple::riscv32 || + JumpTableArch == Triple::riscv64) { + AsmOS << "tail $" << ArgIndex << "@plt\n"; ++ } else if (JumpTableArch == Triple::loongarch64) { ++ AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" ++ << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; + } else { + report_fatal_error("Unsupported architecture for jump tables"); + } +@@ -1304,7 +1310,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctions( + ArrayRef TypeIds, ArrayRef Functions) { + if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || + Arch == Triple::thumb || Arch == Triple::aarch64 || +- Arch == Triple::riscv32 || Arch == Triple::riscv64) ++ Arch == Triple::riscv32 || Arch == Triple::riscv64 || ++ Arch == Triple::loongarch64) + buildBitSetsFromFunctionsNative(TypeIds, Functions); + else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) + buildBitSetsFromFunctionsWASM(TypeIds, Functions); +diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +index ff69abacc8e9..c765937f1991 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll +@@ -4,6 +4,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=aarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,ARM %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,RISCV %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=CHECK,LOONGARCH64 %s + + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" +@@ -116,6 +117,7 @@ define i1 @foo(ptr %p) { + ; X86: define private void @[[JT]]() #{{.*}} align 8 { + ; ARM: define private void @[[JT]]() #{{.*}} align 4 { + ; RISCV: define private void @[[JT]]() #{{.*}} align 8 { ++; LOONGARCH64: define private void @[[JT]]() #{{.*}} align 8 { + + ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { + ; CHECK-NEXT: entry: +diff --git a/llvm/test/Transforms/LowerTypeTests/function.ll b/llvm/test/Transforms/LowerTypeTests/function.ll +index 968c9d434eb2..802b88d92977 100644 +--- a/llvm/test/Transforms/LowerTypeTests/function.ll ++++ b/llvm/test/Transforms/LowerTypeTests/function.ll +@@ -5,6 +5,7 @@ + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv32-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=riscv64-unknown-linux-gnu %s | FileCheck --check-prefixes=RISCV,NATIVE %s + ; RUN: opt -S -passes=lowertypetests -mtriple=wasm32-unknown-unknown %s | FileCheck --check-prefix=WASM32 %s ++; RUN: opt -S -passes=lowertypetests -mtriple=loongarch64-unknown-linux-gnu %s | FileCheck --check-prefixes=LOONGARCH64,NATIVE %s + + ; The right format for Arm jump tables depends on the selected + ; subtarget, so we can't get these tests right without the Arm target +@@ -34,6 +35,7 @@ target datalayout = "e-p:64:64" + ; THUMB: @g = internal alias void (), getelementptr inbounds ([2 x [4 x i8]], ptr @[[JT]], i64 0, i64 1) + ; THUMBV6M: @g = internal alias void (), getelementptr inbounds ([2 x [16 x i8]], ptr @[[JT]], i64 0, i64 1) + ; RISCV: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) ++; LOONGARCH64: @g = internal alias void (), getelementptr inbounds ([2 x [8 x i8]], ptr @[[JT]], i64 0, i64 1) + + ; NATIVE: define hidden void @f.cfi() + ; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]] +@@ -65,6 +67,7 @@ define i1 @foo(ptr %p) { + ; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 { + ; THUMBV6M: define private void @[[JT]]() #[[ATTR:.*]] align 16 { + ; RISCV: define private void @[[JT]]() #[[ATTR:.*]] align 8 { ++; LOONGARCH64: define private void @[[JT]]() #[[ATTR:.*]] align 8 { + + ; X86: jmp ${0:c}@plt + ; X86-SAME: int3 +@@ -99,6 +102,11 @@ define i1 @foo(ptr %p) { + ; RISCV: tail $0@plt + ; RISCV-SAME: tail $1@plt + ++; LOONGARCH64: pcalau12i $$t0, %pc_hi20($0) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($0) ++; LOONGARCH64-SAME: pcalau12i $$t0, %pc_hi20($1) ++; LOONGARCH64-SAME: jirl $$r0, $$t0, %pc_lo12($1) ++ + ; NATIVE-SAME: "s,s"(ptr @f.cfi, ptr @g.cfi) + + ; X86-LINUX: attributes #[[ATTR]] = { naked nocf_check nounwind } +@@ -107,6 +115,7 @@ define i1 @foo(ptr %p) { + ; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" } + ; THUMBV6M: attributes #[[ATTR]] = { naked nounwind "target-features"="+thumb-mode" } + ; RISCV: attributes #[[ATTR]] = { naked nounwind "target-features"="-c,-relax" } ++; LOONGARCH64: attributes #[[ATTR]] = { naked nounwind } + + ; WASM32: ![[I0]] = !{i64 1} + ; WASM32: ![[I1]] = !{i64 2} +-- +2.20.1 + diff --git a/0048-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch b/0048-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch new file mode 100644 index 0000000000000000000000000000000000000000..2caf2e7687ebfcdb508c99935ee818271bf08bfa --- /dev/null +++ b/0048-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch @@ -0,0 +1,44 @@ +From 1708e19c1f2ad47bb8bd83c0a234ed7843fa4e8d Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Fri, 20 Oct 2023 10:44:55 +0800 +Subject: [PATCH 48/66] [LoongArch] Fix td pattern for CACOP LDPTE and LDDIR + +The immediate argument should be a target constant (`timm`). + +(cherry picked from commit 47826b3f148996767ebd2c67ee41c329cb364fef) +Change-Id: I1f2bf8d48c40d35762fad3c9e638c2e756b0da19 +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index b2c4bb812ba5..166379d7d592 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1857,9 +1857,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; + /// Intrinsics + + def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), +- (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; ++ (CACOP timm:$op, GPR:$rj, timm:$imm12)>; + def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; + def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; + def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; +@@ -2023,9 +2023,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), + def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), + (ASRTGT_D GPR:$rj, GPR:$rk)>; + def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), +- (LDDIR GPR:$rj, uimm8:$imm8)>; ++ (LDDIR GPR:$rj, timm:$imm8)>; + def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), +- (LDPTE GPR:$rj, uimm8:$imm8)>; ++ (LDPTE GPR:$rj, timm:$imm8)>; + } // Predicates = [IsLA64] + + //===----------------------------------------------------------------------===// +-- +2.20.1 + diff --git a/0049-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch b/0049-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch new file mode 100644 index 0000000000000000000000000000000000000000..61c9e2568f6d9ae2754740541f3a6685ed351406 --- /dev/null +++ b/0049-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch @@ -0,0 +1,241 @@ +From 0098ff513a67219cc9f647bf50e18505b264195c Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Fri, 10 Nov 2023 15:54:33 +0800 +Subject: [PATCH 49/66] [LoongArch][MC] Refine MCInstrAnalysis based on + registers used (#71276) + +MCInstrAnalysis can return properties of instructions (e.g., isCall(), +isBranch(),...) based on the informations that MCInstrDesc can get from +*InstrInfo*.td files. These infos are based on opcodes only, but JIRL +can have different properties based on different registers used. + +So this patch refines several MCInstrAnalysis methods: isTerminator, +isCall,isReturn,isBranch,isUnconditionalBranch and isIndirectBranch. + +This patch also allows BOLT which will be supported on LoongArch later +to get right instruction infos. + +(cherry picked from commit f7d784709673ca185f6fb0633fd53c72e81f2ae1) +Change-Id: Ia1be413dd828826951a1b9909a5db2013c300441 +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 76 +++++++++++++ + .../unittests/Target/LoongArch/CMakeLists.txt | 1 + + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 107 ++++++++++++++++++ + 3 files changed, 184 insertions(+) + create mode 100644 llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index 942e667bc261..d580c3457fec 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -104,6 +104,82 @@ public: + + return false; + } ++ ++ bool isTerminator(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isTerminator(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0; ++ } ++ } ++ ++ bool isCall(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isCall(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() != LoongArch::R0; ++ } ++ } ++ ++ bool isReturn(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isReturn(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() == LoongArch::R1; ++ } ++ } ++ ++ bool isBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isUnconditionalBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isUnconditionalBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } ++ ++ bool isIndirectBranch(const MCInst &Inst) const override { ++ if (MCInstrAnalysis::isIndirectBranch(Inst)) ++ return true; ++ ++ switch (Inst.getOpcode()) { ++ default: ++ return false; ++ case LoongArch::JIRL: ++ return Inst.getOperand(0).getReg() == LoongArch::R0 && ++ Inst.getOperand(1).getReg() != LoongArch::R1; ++ } ++ } + }; + + } // end namespace +diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt +index fef4f8e15461..e6f8ec073721 100644 +--- a/llvm/unittests/Target/LoongArch/CMakeLists.txt ++++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt +@@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS + + add_llvm_target_unittest(LoongArchTests + InstSizes.cpp ++ MCInstrAnalysisTest.cpp + ) + + set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +new file mode 100644 +index 000000000000..6a208d274a0d +--- /dev/null ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -0,0 +1,107 @@ ++//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/MC/MCInstrAnalysis.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/TargetSelect.h" ++ ++#include "gtest/gtest.h" ++ ++#include ++ ++using namespace llvm; ++ ++namespace { ++ ++class InstrAnalysisTest : public testing::TestWithParam { ++protected: ++ std::unique_ptr Info; ++ std::unique_ptr Analysis; ++ ++ static void SetUpTestSuite() { ++ LLVMInitializeLoongArchTargetInfo(); ++ LLVMInitializeLoongArchTarget(); ++ LLVMInitializeLoongArchTargetMC(); ++ } ++ ++ InstrAnalysisTest() { ++ std::string Error; ++ const Target *TheTarget = ++ TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); ++ Info = std::unique_ptr(TheTarget->createMCInstrInfo()); ++ Analysis = std::unique_ptr( ++ TheTarget->createMCInstrAnalysis(Info.get())); ++ } ++}; ++ ++} // namespace ++ ++static MCInst beq() { ++ return MCInstBuilder(LoongArch::BEQ) ++ .addReg(LoongArch::R0) ++ .addReg(LoongArch::R1) ++ .addImm(32); ++} ++ ++static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } ++ ++static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { ++ return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); ++} ++ ++TEST_P(InstrAnalysisTest, IsTerminator) { ++ EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_FALSE(Analysis->isTerminator(bl())); ++ EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); ++} ++ ++TEST_P(InstrAnalysisTest, IsCall) { ++ EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_TRUE(Analysis->isCall(bl())); ++ EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); ++} ++ ++TEST_P(InstrAnalysisTest, IsReturn) { ++ EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(bl())); ++ EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsBranch) { ++ EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_FALSE(Analysis->isBranch(bl())); ++ EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE( ++ Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++TEST_P(InstrAnalysisTest, IsIndirectBranch) { ++ EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(bl())); ++ EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); ++ EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); ++} ++ ++INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, ++ testing::Values("loongarch32", "loongarch64")); +-- +2.20.1 + diff --git a/0050-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch b/0050-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd0d29a8ee68ac7e1a485a1da1585101ddd0437a --- /dev/null +++ b/0050-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch @@ -0,0 +1,90 @@ +From 5ac9529fad623693c70a0e47cd52e1b48243fc7d Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Wed, 15 Nov 2023 11:12:30 +0800 +Subject: [PATCH 50/66] [LoongArch][NFC] Pre-commit MCInstrAnalysis tests for + instruction 'b' (#71903) + +The tests for 'b' which commented with FIXME are incorrect, the +following patch will fix it. + +(cherry picked from commit f6c4bb07eaa94bcd5d02ba7a46850225b6ed50d4) +Change-Id: Ie43b06a9d8f03477f62546c8a9a5f99b60239c97 +--- + .../Target/LoongArch/MCInstrAnalysisTest.cpp | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6a208d274a0d..6e1919fc2261 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -50,6 +50,8 @@ static MCInst beq() { + .addImm(32); + } + ++static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } ++ + static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } + + static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { +@@ -58,6 +60,7 @@ static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { + + TEST_P(InstrAnalysisTest, IsTerminator) { + EXPECT_TRUE(Analysis->isTerminator(beq())); ++ EXPECT_TRUE(Analysis->isTerminator(b())); + EXPECT_FALSE(Analysis->isTerminator(bl())); + EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); +@@ -65,6 +68,7 @@ TEST_P(InstrAnalysisTest, IsTerminator) { + + TEST_P(InstrAnalysisTest, IsCall) { + EXPECT_FALSE(Analysis->isCall(beq())); ++ EXPECT_FALSE(Analysis->isCall(b())); + EXPECT_TRUE(Analysis->isCall(bl())); + EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); +@@ -72,6 +76,7 @@ TEST_P(InstrAnalysisTest, IsCall) { + + TEST_P(InstrAnalysisTest, IsReturn) { + EXPECT_FALSE(Analysis->isReturn(beq())); ++ EXPECT_FALSE(Analysis->isReturn(b())); + EXPECT_FALSE(Analysis->isReturn(bl())); + EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); +@@ -80,14 +85,26 @@ TEST_P(InstrAnalysisTest, IsReturn) { + + TEST_P(InstrAnalysisTest, IsBranch) { + EXPECT_TRUE(Analysis->isBranch(beq())); ++ EXPECT_TRUE(Analysis->isBranch(b())); + EXPECT_FALSE(Analysis->isBranch(bl())); + EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); + } + ++TEST_P(InstrAnalysisTest, IsConditionalBranch) { ++ EXPECT_TRUE(Analysis->isConditionalBranch(beq())); ++ // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(bl())); ++} ++ + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); ++ // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is ++ // wrong. The following patch will fix it. ++ EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +@@ -97,6 +114,7 @@ TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + + TEST_P(InstrAnalysisTest, IsIndirectBranch) { + EXPECT_FALSE(Analysis->isIndirectBranch(beq())); ++ EXPECT_FALSE(Analysis->isIndirectBranch(b())); + EXPECT_FALSE(Analysis->isIndirectBranch(bl())); + EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); +-- +2.20.1 + diff --git a/0051-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch b/0051-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch new file mode 100644 index 0000000000000000000000000000000000000000..babc1c352e6f0330a1a762a37774d31d35264c85 --- /dev/null +++ b/0051-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch @@ -0,0 +1,58 @@ +From 009145168af549b02d81777430bb4ed7cea3d1f5 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 16 Nov 2023 14:01:58 +0800 +Subject: [PATCH 51/66] [LoongArch] Set isBarrier to true for instruction 'b' + (#72339) + +Instr "b offs26" represent to an unconditional branch in LoongArch. Set +isBarrier to 1 in tablegen for it, so that MCInstrAnalysis can return +correctly. + +Fixes https://github.com/llvm/llvm-project/pull/71903. + +(cherry picked from commit 42a4d5e8cab1537515d92ed56d6e17b673ed352f) +Change-Id: I1c43568374fa884f1dfa97b4f97483343812a24f +--- + llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 1 + + llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 166379d7d592..05ae36a9781d 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -586,6 +586,7 @@ class Br_I26 op> + : FmtI26 { + let isBranch = 1; + let isTerminator = 1; ++ let isBarrier = 1; + } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +index 6e1919fc2261..468ee79615d6 100644 +--- a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp ++++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp +@@ -94,17 +94,13 @@ TEST_P(InstrAnalysisTest, IsBranch) { + + TEST_P(InstrAnalysisTest, IsConditionalBranch) { + EXPECT_TRUE(Analysis->isConditionalBranch(beq())); +- // FIXME: Instr 'b' is not a ConditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_TRUE(Analysis->isConditionalBranch(b())); ++ EXPECT_FALSE(Analysis->isConditionalBranch(b())); + EXPECT_FALSE(Analysis->isConditionalBranch(bl())); + } + + TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); +- // FIXME: Instr 'b' is an UnconditionalBranch, so the analysis here is +- // wrong. The following patch will fix it. +- EXPECT_FALSE(Analysis->isUnconditionalBranch(b())); ++ EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); +-- +2.20.1 + diff --git a/0052-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch b/0052-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch new file mode 100644 index 0000000000000000000000000000000000000000..2839fea1fe7391960b32053bafe6b234759d03a8 --- /dev/null +++ b/0052-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch @@ -0,0 +1,93 @@ +From 8e96f7bd7f5ef8a767bb92253c20ce4f997bec37 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 08:34:52 +0800 +Subject: [PATCH 52/66] [LoongArch][MC] Pre-commit tests for instr bl fixupkind + testing (#72826) + +This patch is used to test whether fixupkind for bl can be returned +correctly. When BL has target-flags(loongarch-call), there is no error. +But without this flag, an assertion error will appear. So the test is +just tagged as "Expectedly Failed" now until the following patch fix it. + +(cherry picked from commit 2ca028ce7c6de5f1350440012355a65383b8729a) +Change-Id: Ie120263561663d2d0ce8a70f2c372e1347128132 +--- + .../CodeGen/LoongArch/test_bl_fixupkind.mir | 66 +++++++++++++++++++ + 1 file changed, 66 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir + +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +new file mode 100644 +index 000000000000..2c1d41be7711 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -0,0 +1,66 @@ ++## Tagged as "Expectedly Failed" until the following patch fix it ++# XFAIL: * ++# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ ++# RUN: llvm-objdump -d - | FileCheck %s ++ ++# REQUIRES: asserts ++ ++## Check that bl can get fixupkind correctly. ++## When BL has target-flags(loongarch-call), there is no error. But without ++## this flag, an assertion error will appear: ++## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++ ++--- | ++ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++ target triple = "loongarch64" ++ ++ define dso_local void @test_bl_fixupkind_with_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_with_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ define dso_local void @test_bl_fixupkind_without_flag() { ++ ; CHECK-LABEL: test_bl_fixupkind_without_flag ++ ; CHECK: addi.d $sp, $sp, -16 ++ ; CHECK-NEXT: st.d $ra, $sp, 8 ++ ; CHECK-NEXT: bl 0 ++ ; CHECK-NEXT: ld.d $ra, $sp, 8 ++ ; CHECK-NEXT: addi.d $sp, $sp, 16 ++ ; CHECK-NEXT: ret ++ entry: ++ call void @foo() ++ ret void ++ } ++ ++ declare dso_local void @foo(...) ++... ++--- ++name: test_bl_fixupkind_with_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... ++--- ++name: test_bl_fixupkind_without_flag ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 ++ BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 ++ ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 ++ PseudoRET ++ ++... +-- +2.20.1 + diff --git a/0053-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch b/0053-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch new file mode 100644 index 0000000000000000000000000000000000000000..3525118c8adbe5d8da2aa9daf1f3de8414702f4e --- /dev/null +++ b/0053-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch @@ -0,0 +1,54 @@ +From ca4406226dec4a681a51aaee06dc1d6ba132c8a5 Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Tue, 21 Nov 2023 19:00:29 +0800 +Subject: [PATCH 53/66] [LoongArch][MC] Support to get the FixupKind for BL + (#72938) + +Previously, bolt could not get FixupKind for BL correctly, because bolt +cannot get target-flags for BL. Here just add support in MCCodeEmitter. + +Fixes https://github.com/llvm/llvm-project/pull/72826. + +(cherry picked from commit 775d2f3201cf7fb657aaf58d1b37c130bd9eb8f9) +Change-Id: Ia7e4174f6fcc4a64f7d1d3b10ed24e457fc08886 +--- + .../LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 + + llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir | 8 ++------ + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +index 08c0820cb862..09d92ac9aa3a 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, + FixupKind = LoongArch::fixup_loongarch_b21; + break; + case LoongArch::B: ++ case LoongArch::BL: + FixupKind = LoongArch::fixup_loongarch_b26; + break; + } +diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +index 2c1d41be7711..70cd5fb8d7eb 100644 +--- a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir ++++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir +@@ -1,14 +1,10 @@ +-## Tagged as "Expectedly Failed" until the following patch fix it +-# XFAIL: * + # RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ + # RUN: llvm-objdump -d - | FileCheck %s + + # REQUIRES: asserts + +-## Check that bl can get fixupkind correctly. +-## When BL has target-flags(loongarch-call), there is no error. But without +-## this flag, an assertion error will appear: +-## Assertion `FixupKind != LoongArch::fixup_loongarch_invalid && "Unhandled expression!"' failed. ++## Check that bl can get fixupkind correctly, whether BL contains ++## target-flags(loongarch-call) or not. + + --- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +-- +2.20.1 + diff --git a/0054-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch b/0054-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4f5b549a96a42e5b341f2335584d8bd558972a3 --- /dev/null +++ b/0054-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch @@ -0,0 +1,135 @@ +From 12275717d661ac4692edaaa48da15622e27493ca Mon Sep 17 00:00:00 2001 +From: ZhaoQi +Date: Thu, 23 Nov 2023 16:38:41 +0800 +Subject: [PATCH 54/66] [LoongArch][MC] Modify branch evaluation for + MCInstrAnalysis (#73205) + +Function evaluateBranch() is used to compute target address for a given +branch instruction and return true on success. But target address of +indirect branch cannot be simply added, so rule it out and just return +false. + +This patch also add objdump tests which capture the current state of +support for printing branch targets. Without this patch, the result of +"jirl $zero, $a0, 4" is "jirl $zero, $a0, 4 ". It is obviously +incorrect, because this instruction represents an indirect branch whose +target address depends on both the register value and the imm. After +this patch, it will be right despite loss of details. + +(cherry picked from commit 1c68c4c57a65a67963264878bc4646be8b58854c) +Change-Id: I326faea78b4c5fa68dc74cefa07e6195c7de6b51 +--- + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 3 +- + .../llvm-objdump/ELF/LoongArch/branches.s | 76 +++++++++++++++++++ + .../llvm-objdump/ELF/LoongArch/lit.local.cfg | 2 + + 3 files changed, 80 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s + create mode 100644 llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +index d580c3457fec..a4e6a09863e6 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -97,7 +97,8 @@ public: + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); +- if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { ++ if ((isBranch(Inst) && !isIndirectBranch(Inst)) || ++ Inst.getOpcode() == LoongArch::BL) { + Target = Addr + Inst.getOperand(NumOps - 1).getImm(); + return true; + } +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +new file mode 100644 +index 000000000000..8cb00aef9954 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s +@@ -0,0 +1,76 @@ ++# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ ++# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s ++ ++# CHECK-LABEL: : ++foo: ++# CHECK: beq $a0, $a1, 108 ++beq $a0, $a1, .Llocal ++# CHECK: bne $a0, $a1, 104 ++bne $a0, $a1, .Llocal ++# CHECK: blt $a0, $a1, 100 ++blt $a0, $a1, .Llocal ++# CHECK: bltu $a0, $a1, 96 ++bltu $a0, $a1, .Llocal ++# CHECK: bge $a0, $a1, 92 ++bge $a0, $a1, .Llocal ++# CHECK: bgeu $a0, $a1, 88 ++bgeu $a0, $a1, .Llocal ++# CHECK: beqz $a0, 84 ++beqz $a0, .Llocal ++# CHECK: bnez $a0, 80 ++bnez $a0, .Llocal ++# CHECK: bceqz $fcc6, 76 ++bceqz $fcc6, .Llocal ++# CHECK: bcnez $fcc6, 72 ++bcnez $fcc6, .Llocal ++ ++# CHECK: beq $a0, $a1, 76 ++beq $a0, $a1, bar ++# CHECK: bne $a0, $a1, 72 ++bne $a0, $a1, bar ++# CHECK: blt $a0, $a1, 68 ++blt $a0, $a1, bar ++# CHECK: bltu $a0, $a1, 64 ++bltu $a0, $a1, bar ++# CHECK: bge $a0, $a1, 60 ++bge $a0, $a1, bar ++# CHECK: bgeu $a0, $a1, 56 ++bgeu $a0, $a1, bar ++# CHECK: beqz $a0, 52 ++beqz $a0, bar ++# CHECK: bnez $a0, 48 ++bnez $a0, bar ++# CHECK: bceqz $fcc6, 44 ++bceqz $fcc6, bar ++# CHECK: bcnez $fcc6, 40 ++bcnez $fcc6, bar ++ ++# CHECK: b 28 ++b .Llocal ++# CHECK: b 32 ++b bar ++ ++# CHECK: bl 20 ++bl .Llocal ++# CHECK: bl 24 ++bl bar ++ ++# CHECK: jirl $zero, $a0, 4{{$}} ++jirl $zero, $a0, 4 ++# CHECK: jirl $ra, $a0, 4{{$}} ++jirl $ra, $a0, 4 ++# CHECK: ret ++ret ++ ++.Llocal: ++# CHECK: 6c: nop ++# CHECK: nop ++nop ++nop ++ ++# CHECK-LABEL: : ++bar: ++# CHECK: 74: nop ++nop +diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +new file mode 100644 +index 000000000000..cc24278acbb4 +--- /dev/null ++++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg +@@ -0,0 +1,2 @@ ++if not "LoongArch" in config.root.targets: ++ config.unsupported = True +-- +2.20.1 + diff --git a/0055-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch b/0055-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch new file mode 100644 index 0000000000000000000000000000000000000000..67f41d52d2b781f33240c32b5b38c733fed82ecb --- /dev/null +++ b/0055-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch @@ -0,0 +1,140 @@ +From 91fcc287c4a1a267f20c459177cf6203a8c6c3ed Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 15:15:26 +0800 +Subject: [PATCH 55/66] [LoongArch] Precommit a test for smul with overflow + (NFC) (#73212) + +(cherry picked from commit 7414c0db962f8a5029fd44c3e0bc93d9ce20be71) +Change-Id: I20a719b4732893b9ef268e0b643fdecab7f1712d +--- + .../CodeGen/LoongArch/smul-with-overflow.ll | 118 ++++++++++++++++++ + 1 file changed, 118 insertions(+) + create mode 100644 llvm/test/CodeGen/LoongArch/smul-with-overflow.ll + +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +new file mode 100644 +index 000000000000..a53e77e5aa4b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -0,0 +1,118 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ++; LA32-LABEL: smuloi64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a4 ++; LA32-NEXT: st.w $zero, $sp, 4 ++; LA32-NEXT: addi.w $a4, $sp, 4 ++; LA32-NEXT: bl %plt(__mulodi4) ++; LA32-NEXT: st.w $a1, $fp, 4 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi64: ++; LA64: # %bb.0: ++; LA64-NEXT: mul.d $a3, $a0, $a1 ++; LA64-NEXT: st.d $a3, $a2, 0 ++; LA64-NEXT: mulh.d $a0, $a0, $a1 ++; LA64-NEXT: srai.d $a1, $a3, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ret ++ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) ++ %val = extractvalue {i64, i1} %t, 0 ++ %obit = extractvalue {i64, i1} %t, 1 ++ store i64 %val, ptr %res ++ ret i1 %obit ++} ++ ++define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ++; LA32-LABEL: smuloi128: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -64 ++; LA32-NEXT: .cfi_def_cfa_offset 64 ++; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: .cfi_offset 22, -8 ++; LA32-NEXT: move $fp, $a2 ++; LA32-NEXT: st.w $zero, $sp, 52 ++; LA32-NEXT: ld.w $a2, $a1, 12 ++; LA32-NEXT: st.w $a2, $sp, 12 ++; LA32-NEXT: ld.w $a2, $a1, 8 ++; LA32-NEXT: st.w $a2, $sp, 8 ++; LA32-NEXT: ld.w $a2, $a1, 4 ++; LA32-NEXT: st.w $a2, $sp, 4 ++; LA32-NEXT: ld.w $a1, $a1, 0 ++; LA32-NEXT: st.w $a1, $sp, 0 ++; LA32-NEXT: ld.w $a1, $a0, 12 ++; LA32-NEXT: st.w $a1, $sp, 28 ++; LA32-NEXT: ld.w $a1, $a0, 8 ++; LA32-NEXT: st.w $a1, $sp, 24 ++; LA32-NEXT: ld.w $a1, $a0, 4 ++; LA32-NEXT: st.w $a1, $sp, 20 ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: st.w $a0, $sp, 16 ++; LA32-NEXT: addi.w $a0, $sp, 32 ++; LA32-NEXT: addi.w $a1, $sp, 16 ++; LA32-NEXT: addi.w $a2, $sp, 0 ++; LA32-NEXT: addi.w $a3, $sp, 52 ++; LA32-NEXT: bl %plt(__muloti4) ++; LA32-NEXT: ld.w $a0, $sp, 44 ++; LA32-NEXT: st.w $a0, $fp, 12 ++; LA32-NEXT: ld.w $a0, $sp, 40 ++; LA32-NEXT: st.w $a0, $fp, 8 ++; LA32-NEXT: ld.w $a0, $sp, 36 ++; LA32-NEXT: st.w $a0, $fp, 4 ++; LA32-NEXT: ld.w $a0, $sp, 32 ++; LA32-NEXT: st.w $a0, $fp, 0 ++; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: sltu $a0, $zero, $a0 ++; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: smuloi128: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.d $sp, $sp, -32 ++; LA64-NEXT: .cfi_def_cfa_offset 32 ++; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ++; LA64-NEXT: .cfi_offset 1, -8 ++; LA64-NEXT: .cfi_offset 22, -16 ++; LA64-NEXT: move $fp, $a4 ++; LA64-NEXT: st.d $zero, $sp, 8 ++; LA64-NEXT: addi.d $a4, $sp, 8 ++; LA64-NEXT: bl %plt(__muloti4) ++; LA64-NEXT: st.d $a1, $fp, 8 ++; LA64-NEXT: st.d $a0, $fp, 0 ++; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: sltu $a0, $zero, $a0 ++; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ++; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; LA64-NEXT: addi.d $sp, $sp, 32 ++; LA64-NEXT: ret ++ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) ++ %val = extractvalue {i128, i1} %t, 0 ++ %obit = extractvalue {i128, i1} %t, 1 ++ store i128 %val, ptr %res ++ ret i1 %obit ++} ++ ++declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone ++declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone +-- +2.20.1 + diff --git a/0056-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch b/0056-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch new file mode 100644 index 0000000000000000000000000000000000000000..d17b75b6ed457b3250b4949f2e4940a1be523a44 --- /dev/null +++ b/0056-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch @@ -0,0 +1,529 @@ +From a506e3a165025f8f4088132f2f1082cd31e47b6c Mon Sep 17 00:00:00 2001 +From: hev +Date: Thu, 23 Nov 2023 19:34:50 +0800 +Subject: [PATCH 56/66] [LoongArch] Disable mulodi4 and muloti4 libcalls + (#73199) + +This library function only exists in compiler-rt not libgcc. So this +would fail to link unless we were linking with compiler-rt. + +Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 + +(cherry picked from commit 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e) +Change-Id: Ie95d3bccc70b3b9a7aedd590864dd4e8f9cd7d6b +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- + 2 files changed, 397 insertions(+), 71 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index f7eacd56c542..ed106cb766bc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + + // Set libcalls. + setLibcallName(RTLIB::MUL_I128, nullptr); ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I64, nullptr); + } + ++ // The MULO libcall is not part of libgcc, only compiler-rt. ++ setLibcallName(RTLIB::MULO_I128, nullptr); ++ + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +index a53e77e5aa4b..6cba4108d63c 100644 +--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll ++++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +@@ -5,23 +5,53 @@ + define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + ; LA32-LABEL: smuloi64: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -16 +-; LA32-NEXT: .cfi_def_cfa_offset 16 +-; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +-; LA32-NEXT: .cfi_offset 1, -4 +-; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a4 +-; LA32-NEXT: st.w $zero, $sp, 4 +-; LA32-NEXT: addi.w $a4, $sp, 4 +-; LA32-NEXT: bl %plt(__mulodi4) +-; LA32-NEXT: st.w $a1, $fp, 4 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 4 ++; LA32-NEXT: srai.w $a5, $a1, 31 ++; LA32-NEXT: mul.w $a6, $a2, $a5 ++; LA32-NEXT: mulh.wu $a7, $a2, $a5 ++; LA32-NEXT: add.w $a7, $a7, $a6 ++; LA32-NEXT: mul.w $a5, $a3, $a5 ++; LA32-NEXT: add.w $a5, $a7, $a5 ++; LA32-NEXT: srai.w $a7, $a3, 31 ++; LA32-NEXT: mul.w $t0, $a7, $a1 ++; LA32-NEXT: mulh.wu $t1, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: mul.w $a7, $a7, $a0 ++; LA32-NEXT: add.w $t0, $t0, $a7 ++; LA32-NEXT: add.w $a5, $t0, $a5 ++; LA32-NEXT: mulh.wu $t0, $a0, $a2 ++; LA32-NEXT: mul.w $t1, $a1, $a2 ++; LA32-NEXT: add.w $t0, $t1, $t0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: mulh.wu $t2, $a1, $a2 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: mul.w $t2, $a0, $a3 ++; LA32-NEXT: add.w $t0, $t2, $t0 ++; LA32-NEXT: sltu $t2, $t0, $t2 ++; LA32-NEXT: mulh.wu $t3, $a0, $a3 ++; LA32-NEXT: add.w $t2, $t3, $t2 ++; LA32-NEXT: add.w $a6, $a7, $a6 ++; LA32-NEXT: sltu $a7, $a6, $a7 ++; LA32-NEXT: add.w $a5, $a5, $a7 ++; LA32-NEXT: mul.w $a0, $a0, $a2 ++; LA32-NEXT: mul.w $a2, $a1, $a3 ++; LA32-NEXT: mulh.wu $a1, $a1, $a3 ++; LA32-NEXT: add.w $a3, $t1, $t2 ++; LA32-NEXT: sltu $a7, $a3, $t1 ++; LA32-NEXT: add.w $a1, $a1, $a7 ++; LA32-NEXT: st.w $a0, $a4, 0 ++; LA32-NEXT: add.w $a0, $a2, $a3 ++; LA32-NEXT: sltu $a2, $a0, $a2 ++; LA32-NEXT: add.w $a1, $a1, $a2 ++; LA32-NEXT: st.w $t0, $a4, 4 ++; LA32-NEXT: add.w $a1, $a1, $a5 ++; LA32-NEXT: add.w $a2, $a0, $a6 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: srai.w $a1, $t0, 31 ++; LA32-NEXT: xor $a0, $a0, $a1 ++; LA32-NEXT: xor $a1, $a2, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi64: +@@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { + define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { + ; LA32-LABEL: smuloi128: + ; LA32: # %bb.0: +-; LA32-NEXT: addi.w $sp, $sp, -64 +-; LA32-NEXT: .cfi_def_cfa_offset 64 +-; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +-; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: addi.w $sp, $sp, -96 ++; LA32-NEXT: .cfi_def_cfa_offset 96 ++; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill ++; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill ++; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill + ; LA32-NEXT: .cfi_offset 1, -4 + ; LA32-NEXT: .cfi_offset 22, -8 +-; LA32-NEXT: move $fp, $a2 +-; LA32-NEXT: st.w $zero, $sp, 52 +-; LA32-NEXT: ld.w $a2, $a1, 12 +-; LA32-NEXT: st.w $a2, $sp, 12 +-; LA32-NEXT: ld.w $a2, $a1, 8 +-; LA32-NEXT: st.w $a2, $sp, 8 +-; LA32-NEXT: ld.w $a2, $a1, 4 +-; LA32-NEXT: st.w $a2, $sp, 4 +-; LA32-NEXT: ld.w $a1, $a1, 0 +-; LA32-NEXT: st.w $a1, $sp, 0 +-; LA32-NEXT: ld.w $a1, $a0, 12 +-; LA32-NEXT: st.w $a1, $sp, 28 +-; LA32-NEXT: ld.w $a1, $a0, 8 +-; LA32-NEXT: st.w $a1, $sp, 24 +-; LA32-NEXT: ld.w $a1, $a0, 4 +-; LA32-NEXT: st.w $a1, $sp, 20 +-; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: st.w $a0, $sp, 16 +-; LA32-NEXT: addi.w $a0, $sp, 32 +-; LA32-NEXT: addi.w $a1, $sp, 16 +-; LA32-NEXT: addi.w $a2, $sp, 0 +-; LA32-NEXT: addi.w $a3, $sp, 52 +-; LA32-NEXT: bl %plt(__muloti4) +-; LA32-NEXT: ld.w $a0, $sp, 44 +-; LA32-NEXT: st.w $a0, $fp, 12 +-; LA32-NEXT: ld.w $a0, $sp, 40 +-; LA32-NEXT: st.w $a0, $fp, 8 +-; LA32-NEXT: ld.w $a0, $sp, 36 +-; LA32-NEXT: st.w $a0, $fp, 4 +-; LA32-NEXT: ld.w $a0, $sp, 32 +-; LA32-NEXT: st.w $a0, $fp, 0 +-; LA32-NEXT: ld.w $a0, $sp, 52 ++; LA32-NEXT: .cfi_offset 23, -12 ++; LA32-NEXT: .cfi_offset 24, -16 ++; LA32-NEXT: .cfi_offset 25, -20 ++; LA32-NEXT: .cfi_offset 26, -24 ++; LA32-NEXT: .cfi_offset 27, -28 ++; LA32-NEXT: .cfi_offset 28, -32 ++; LA32-NEXT: .cfi_offset 29, -36 ++; LA32-NEXT: .cfi_offset 30, -40 ++; LA32-NEXT: .cfi_offset 31, -44 ++; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ld.w $a6, $a1, 0 ++; LA32-NEXT: ld.w $a7, $a0, 0 ++; LA32-NEXT: mulh.wu $a3, $a7, $a6 ++; LA32-NEXT: ld.w $a5, $a0, 4 ++; LA32-NEXT: mul.w $a4, $a5, $a6 ++; LA32-NEXT: add.w $a3, $a4, $a3 ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t0, $a5, $a6 ++; LA32-NEXT: add.w $a4, $t0, $a4 ++; LA32-NEXT: ld.w $t0, $a1, 4 ++; LA32-NEXT: mul.w $t1, $a7, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a3 ++; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill ++; LA32-NEXT: sltu $t1, $a3, $t1 ++; LA32-NEXT: mulh.wu $t2, $a7, $t0 ++; LA32-NEXT: add.w $t1, $t2, $t1 ++; LA32-NEXT: ld.w $t4, $a0, 12 ++; LA32-NEXT: ld.w $t2, $a0, 8 ++; LA32-NEXT: ld.w $t3, $a1, 8 ++; LA32-NEXT: mulh.wu $a0, $t2, $t3 ++; LA32-NEXT: mul.w $t5, $t4, $t3 ++; LA32-NEXT: add.w $a0, $t5, $a0 ++; LA32-NEXT: sltu $t5, $a0, $t5 ++; LA32-NEXT: mulh.wu $t6, $t4, $t3 ++; LA32-NEXT: add.w $t5, $t6, $t5 ++; LA32-NEXT: ld.w $t7, $a1, 12 ++; LA32-NEXT: mul.w $a1, $t2, $t7 ++; LA32-NEXT: add.w $a0, $a1, $a0 ++; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a1, $a0, $a1 ++; LA32-NEXT: mulh.wu $t6, $t2, $t7 ++; LA32-NEXT: add.w $t6, $t6, $a1 ++; LA32-NEXT: srai.w $s7, $t4, 31 ++; LA32-NEXT: mul.w $a1, $s7, $t7 ++; LA32-NEXT: mulh.wu $t8, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a1 ++; LA32-NEXT: mulh.wu $fp, $a6, $s7 ++; LA32-NEXT: mul.w $s6, $t0, $s7 ++; LA32-NEXT: add.w $s8, $s6, $fp ++; LA32-NEXT: mul.w $a1, $a6, $s7 ++; LA32-NEXT: add.w $ra, $a1, $s8 ++; LA32-NEXT: sltu $s0, $ra, $a1 ++; LA32-NEXT: add.w $a0, $fp, $s0 ++; LA32-NEXT: add.w $a3, $a4, $t1 ++; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a4 ++; LA32-NEXT: mulh.wu $t1, $a5, $t0 ++; LA32-NEXT: add.w $a3, $t1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill ++; LA32-NEXT: srai.w $s4, $t7, 31 ++; LA32-NEXT: mul.w $fp, $a7, $s4 ++; LA32-NEXT: mulh.wu $a4, $a7, $s4 ++; LA32-NEXT: add.w $s1, $a4, $fp ++; LA32-NEXT: sltu $s0, $s1, $fp ++; LA32-NEXT: add.w $s5, $a4, $s0 ++; LA32-NEXT: mul.w $a4, $s7, $t3 ++; LA32-NEXT: add.w $t8, $t8, $a4 ++; LA32-NEXT: add.w $s0, $ra, $t8 ++; LA32-NEXT: add.w $a3, $a1, $a4 ++; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill ++; LA32-NEXT: sltu $a4, $a3, $a1 ++; LA32-NEXT: add.w $a3, $s0, $a4 ++; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill ++; LA32-NEXT: add.w $s3, $t5, $t6 ++; LA32-NEXT: sltu $a4, $s3, $t5 ++; LA32-NEXT: mulh.wu $t5, $t4, $t7 ++; LA32-NEXT: add.w $a3, $t5, $a4 ++; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill ++; LA32-NEXT: mul.w $a4, $a7, $a6 ++; LA32-NEXT: st.w $a4, $a2, 0 ++; LA32-NEXT: sltu $a4, $s8, $s6 ++; LA32-NEXT: mulh.wu $t5, $t0, $s7 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: add.w $t1, $a4, $a0 ++; LA32-NEXT: sltu $a4, $t1, $a4 ++; LA32-NEXT: add.w $s2, $t5, $a4 ++; LA32-NEXT: mulh.wu $a4, $a7, $t3 ++; LA32-NEXT: mul.w $t5, $a5, $t3 ++; LA32-NEXT: add.w $a4, $t5, $a4 ++; LA32-NEXT: sltu $t5, $a4, $t5 ++; LA32-NEXT: mulh.wu $t6, $a5, $t3 ++; LA32-NEXT: add.w $a3, $t6, $t5 ++; LA32-NEXT: mul.w $t6, $a7, $t7 ++; LA32-NEXT: add.w $t5, $t6, $a4 ++; LA32-NEXT: sltu $a4, $t5, $t6 ++; LA32-NEXT: mulh.wu $t6, $a7, $t7 ++; LA32-NEXT: add.w $a4, $t6, $a4 ++; LA32-NEXT: mulh.wu $t6, $t2, $a6 ++; LA32-NEXT: mul.w $s7, $t4, $a6 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t4, $a6 ++; LA32-NEXT: add.w $a0, $s8, $s7 ++; LA32-NEXT: mul.w $s7, $t2, $t0 ++; LA32-NEXT: add.w $t6, $s7, $t6 ++; LA32-NEXT: sltu $s7, $t6, $s7 ++; LA32-NEXT: mulh.wu $s8, $t2, $t0 ++; LA32-NEXT: add.w $a2, $s8, $s7 ++; LA32-NEXT: mul.w $s8, $a5, $s4 ++; LA32-NEXT: add.w $s7, $s1, $s8 ++; LA32-NEXT: add.w $s1, $s7, $ra ++; LA32-NEXT: add.w $a1, $fp, $a1 ++; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill ++; LA32-NEXT: sltu $ra, $a1, $fp ++; LA32-NEXT: add.w $a1, $s1, $ra ++; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill ++; LA32-NEXT: xor $s0, $a1, $s7 ++; LA32-NEXT: sltui $s0, $s0, 1 ++; LA32-NEXT: sltu $a1, $a1, $s7 ++; LA32-NEXT: masknez $s1, $a1, $s0 ++; LA32-NEXT: maskeqz $s0, $ra, $s0 ++; LA32-NEXT: add.w $t1, $s6, $t1 ++; LA32-NEXT: sltu $s6, $t1, $s6 ++; LA32-NEXT: add.w $s2, $s2, $s6 ++; LA32-NEXT: add.w $a2, $a0, $a2 ++; LA32-NEXT: sltu $a0, $a2, $a0 ++; LA32-NEXT: mulh.wu $s6, $t4, $t0 ++; LA32-NEXT: add.w $t8, $s6, $a0 ++; LA32-NEXT: add.w $a4, $a3, $a4 ++; LA32-NEXT: sltu $a3, $a4, $a3 ++; LA32-NEXT: mulh.wu $s6, $a5, $t7 ++; LA32-NEXT: add.w $a3, $s6, $a3 ++; LA32-NEXT: mul.w $s6, $t4, $t7 ++; LA32-NEXT: mul.w $t7, $a5, $t7 ++; LA32-NEXT: mul.w $ra, $t4, $t0 ++; LA32-NEXT: mul.w $t0, $a5, $t0 ++; LA32-NEXT: mul.w $t4, $t4, $s4 ++; LA32-NEXT: mul.w $a7, $a7, $t3 ++; LA32-NEXT: mul.w $a6, $t2, $a6 ++; LA32-NEXT: mul.w $t3, $t2, $t3 ++; LA32-NEXT: mul.w $a0, $t2, $s4 ++; LA32-NEXT: mulh.wu $t2, $t2, $s4 ++; LA32-NEXT: mulh.wu $a5, $s4, $a5 ++; LA32-NEXT: sltu $s4, $s7, $s8 ++; LA32-NEXT: add.w $s4, $a5, $s4 ++; LA32-NEXT: add.w $s4, $s5, $s4 ++; LA32-NEXT: sltu $s5, $s4, $s5 ++; LA32-NEXT: add.w $s5, $a5, $s5 ++; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a1, $t0, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t0 ++; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload ++; LA32-NEXT: add.w $t0, $t0, $a5 ++; LA32-NEXT: or $s0, $s0, $s1 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: sltu $a5, $a4, $t7 ++; LA32-NEXT: add.w $t7, $a3, $a5 ++; LA32-NEXT: add.w $s1, $ra, $a2 ++; LA32-NEXT: sltu $a2, $s1, $ra ++; LA32-NEXT: add.w $t8, $t8, $a2 ++; LA32-NEXT: add.w $a5, $s6, $s3 ++; LA32-NEXT: sltu $a2, $a5, $s6 ++; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a2, $a3, $a2 ++; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload ++; LA32-NEXT: st.w $a3, $s6, 4 ++; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a3, $s2, $a3 ++; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload ++; LA32-NEXT: add.w $s2, $t1, $s2 ++; LA32-NEXT: sltu $t1, $s2, $t1 ++; LA32-NEXT: add.w $a3, $a3, $t1 ++; LA32-NEXT: add.w $t1, $s8, $s4 ++; LA32-NEXT: sltu $s3, $t1, $s8 ++; LA32-NEXT: add.w $s3, $s5, $s3 ++; LA32-NEXT: add.w $t2, $t2, $a0 ++; LA32-NEXT: add.w $t2, $t2, $t4 ++; LA32-NEXT: add.w $t2, $t2, $s7 ++; LA32-NEXT: add.w $t4, $a0, $fp ++; LA32-NEXT: sltu $a0, $t4, $a0 ++; LA32-NEXT: add.w $a0, $t2, $a0 ++; LA32-NEXT: add.w $a0, $s3, $a0 ++; LA32-NEXT: add.w $t2, $t1, $t4 ++; LA32-NEXT: sltu $t1, $t2, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $t1, $t2, $s2 ++; LA32-NEXT: sltu $a3, $t1, $t2 ++; LA32-NEXT: add.w $a0, $a0, $a3 ++; LA32-NEXT: add.w $a3, $t6, $t0 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $t0, $a3, $a6 ++; LA32-NEXT: add.w $a1, $a7, $a1 ++; LA32-NEXT: sltu $a7, $a1, $a7 ++; LA32-NEXT: add.w $a3, $t5, $t0 ++; LA32-NEXT: add.w $a3, $a3, $a7 ++; LA32-NEXT: sltu $t2, $a3, $t5 ++; LA32-NEXT: xor $t4, $a3, $t5 ++; LA32-NEXT: sltui $t4, $t4, 1 ++; LA32-NEXT: masknez $t2, $t2, $t4 ++; LA32-NEXT: maskeqz $a7, $a7, $t4 ++; LA32-NEXT: st.w $a1, $s6, 8 ++; LA32-NEXT: or $a1, $a7, $t2 ++; LA32-NEXT: sltu $a7, $t0, $t6 ++; LA32-NEXT: xor $t0, $t0, $t6 ++; LA32-NEXT: sltui $t0, $t0, 1 ++; LA32-NEXT: masknez $a7, $a7, $t0 ++; LA32-NEXT: maskeqz $a6, $a6, $t0 ++; LA32-NEXT: or $a6, $a6, $a7 ++; LA32-NEXT: add.w $a6, $s1, $a6 ++; LA32-NEXT: sltu $a7, $a6, $s1 ++; LA32-NEXT: add.w $a7, $t8, $a7 ++; LA32-NEXT: add.w $a1, $a4, $a1 ++; LA32-NEXT: sltu $a4, $a1, $a4 ++; LA32-NEXT: add.w $a4, $t7, $a4 ++; LA32-NEXT: add.w $t0, $t1, $s0 ++; LA32-NEXT: sltu $t1, $t0, $t1 ++; LA32-NEXT: add.w $a0, $a0, $t1 ++; LA32-NEXT: st.w $a3, $s6, 12 ++; LA32-NEXT: add.w $a1, $a6, $a1 ++; LA32-NEXT: sltu $a6, $a1, $a6 ++; LA32-NEXT: add.w $a4, $a7, $a4 ++; LA32-NEXT: add.w $a4, $a4, $a6 ++; LA32-NEXT: sltu $t1, $a4, $a7 ++; LA32-NEXT: xor $a7, $a4, $a7 ++; LA32-NEXT: sltui $a7, $a7, 1 ++; LA32-NEXT: masknez $t1, $t1, $a7 ++; LA32-NEXT: maskeqz $a6, $a6, $a7 ++; LA32-NEXT: or $a6, $a6, $t1 ++; LA32-NEXT: add.w $a6, $a5, $a6 ++; LA32-NEXT: sltu $a5, $a6, $a5 ++; LA32-NEXT: add.w $a2, $a2, $a5 ++; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a4, $t1, $a4 ++; LA32-NEXT: add.w $a1, $t3, $a1 ++; LA32-NEXT: sltu $a5, $a1, $t3 ++; LA32-NEXT: add.w $a4, $a4, $a5 ++; LA32-NEXT: sltu $a7, $a4, $t1 ++; LA32-NEXT: xor $t1, $a4, $t1 ++; LA32-NEXT: sltui $t1, $t1, 1 ++; LA32-NEXT: masknez $a7, $a7, $t1 ++; LA32-NEXT: maskeqz $a5, $a5, $t1 ++; LA32-NEXT: or $a5, $a5, $a7 ++; LA32-NEXT: add.w $a5, $a6, $a5 ++; LA32-NEXT: sltu $a6, $a5, $a6 ++; LA32-NEXT: add.w $a2, $a2, $a6 ++; LA32-NEXT: add.w $a0, $a2, $a0 ++; LA32-NEXT: add.w $a2, $a5, $t0 ++; LA32-NEXT: sltu $a5, $a2, $a5 ++; LA32-NEXT: add.w $a0, $a0, $a5 ++; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a5, $a1, $a5 ++; LA32-NEXT: sltu $a1, $a5, $a1 ++; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload ++; LA32-NEXT: add.w $a6, $a4, $a6 ++; LA32-NEXT: add.w $a6, $a6, $a1 ++; LA32-NEXT: sltu $a7, $a6, $a4 ++; LA32-NEXT: xor $a4, $a6, $a4 ++; LA32-NEXT: sltui $a4, $a4, 1 ++; LA32-NEXT: masknez $a7, $a7, $a4 ++; LA32-NEXT: maskeqz $a1, $a1, $a4 ++; LA32-NEXT: or $a1, $a1, $a7 ++; LA32-NEXT: add.w $a1, $a2, $a1 ++; LA32-NEXT: sltu $a2, $a1, $a2 ++; LA32-NEXT: add.w $a0, $a0, $a2 ++; LA32-NEXT: srai.w $a2, $a3, 31 ++; LA32-NEXT: xor $a3, $a6, $a2 ++; LA32-NEXT: xor $a0, $a0, $a2 ++; LA32-NEXT: or $a0, $a3, $a0 ++; LA32-NEXT: xor $a3, $a5, $a2 ++; LA32-NEXT: xor $a1, $a1, $a2 ++; LA32-NEXT: or $a1, $a3, $a1 ++; LA32-NEXT: or $a0, $a1, $a0 + ; LA32-NEXT: sltu $a0, $zero, $a0 +-; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +-; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +-; LA32-NEXT: addi.w $sp, $sp, 64 ++; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload ++; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 96 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: smuloi128: + ; LA64: # %bb.0: +-; LA64-NEXT: addi.d $sp, $sp, -32 +-; LA64-NEXT: .cfi_def_cfa_offset 32 +-; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +-; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +-; LA64-NEXT: .cfi_offset 1, -8 +-; LA64-NEXT: .cfi_offset 22, -16 +-; LA64-NEXT: move $fp, $a4 +-; LA64-NEXT: st.d $zero, $sp, 8 +-; LA64-NEXT: addi.d $a4, $sp, 8 +-; LA64-NEXT: bl %plt(__muloti4) +-; LA64-NEXT: st.d $a1, $fp, 8 +-; LA64-NEXT: st.d $a0, $fp, 0 +-; LA64-NEXT: ld.d $a0, $sp, 8 ++; LA64-NEXT: srai.d $a5, $a1, 63 ++; LA64-NEXT: mul.d $a6, $a2, $a5 ++; LA64-NEXT: mulh.du $a7, $a2, $a5 ++; LA64-NEXT: add.d $a7, $a7, $a6 ++; LA64-NEXT: mul.d $a5, $a3, $a5 ++; LA64-NEXT: add.d $a5, $a7, $a5 ++; LA64-NEXT: srai.d $a7, $a3, 63 ++; LA64-NEXT: mul.d $t0, $a7, $a1 ++; LA64-NEXT: mulh.du $t1, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: mul.d $a7, $a7, $a0 ++; LA64-NEXT: add.d $t0, $t0, $a7 ++; LA64-NEXT: add.d $a5, $t0, $a5 ++; LA64-NEXT: mulh.du $t0, $a0, $a2 ++; LA64-NEXT: mul.d $t1, $a1, $a2 ++; LA64-NEXT: add.d $t0, $t1, $t0 ++; LA64-NEXT: sltu $t1, $t0, $t1 ++; LA64-NEXT: mulh.du $t2, $a1, $a2 ++; LA64-NEXT: add.d $t1, $t2, $t1 ++; LA64-NEXT: mul.d $t2, $a0, $a3 ++; LA64-NEXT: add.d $t0, $t2, $t0 ++; LA64-NEXT: sltu $t2, $t0, $t2 ++; LA64-NEXT: mulh.du $t3, $a0, $a3 ++; LA64-NEXT: add.d $t2, $t3, $t2 ++; LA64-NEXT: add.d $a6, $a7, $a6 ++; LA64-NEXT: sltu $a7, $a6, $a7 ++; LA64-NEXT: add.d $a5, $a5, $a7 ++; LA64-NEXT: mul.d $a0, $a0, $a2 ++; LA64-NEXT: mul.d $a2, $a1, $a3 ++; LA64-NEXT: mulh.du $a1, $a1, $a3 ++; LA64-NEXT: add.d $a3, $t1, $t2 ++; LA64-NEXT: sltu $a7, $a3, $t1 ++; LA64-NEXT: add.d $a1, $a1, $a7 ++; LA64-NEXT: st.d $a0, $a4, 0 ++; LA64-NEXT: add.d $a0, $a2, $a3 ++; LA64-NEXT: sltu $a2, $a0, $a2 ++; LA64-NEXT: add.d $a1, $a1, $a2 ++; LA64-NEXT: st.d $t0, $a4, 8 ++; LA64-NEXT: add.d $a1, $a1, $a5 ++; LA64-NEXT: add.d $a2, $a0, $a6 ++; LA64-NEXT: sltu $a0, $a2, $a0 ++; LA64-NEXT: add.d $a0, $a1, $a0 ++; LA64-NEXT: srai.d $a1, $t0, 63 ++; LA64-NEXT: xor $a0, $a0, $a1 ++; LA64-NEXT: xor $a1, $a2, $a1 ++; LA64-NEXT: or $a0, $a1, $a0 + ; LA64-NEXT: sltu $a0, $zero, $a0 +-; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +-; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +-; LA64-NEXT: addi.d $sp, $sp, 32 + ; LA64-NEXT: ret + %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) + %val = extractvalue {i128, i1} %t, 0 +-- +2.20.1 + diff --git a/0057-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch b/0057-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch new file mode 100644 index 0000000000000000000000000000000000000000..746d0672c39128e7d374a2c2c41c766f48c16618 --- /dev/null +++ b/0057-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch @@ -0,0 +1,733 @@ +From 92f1e37509b7ef4829302213743a0f3c464c3e79 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Wed, 29 Nov 2023 15:21:21 +0800 +Subject: [PATCH 57/66] [LoongArch] Fix pattern for FNMSUB_{S/D} instructions + (#73742) + +``` +when a=c=-0.0, b=0.0: +-(a * b + (-c)) = -0.0 +-a * b + c = 0.0 +(fneg (fma a, b (-c))) != (fma (fneg a), b ,c) +``` + +See https://reviews.llvm.org/D90901 for a similar discussion on X86. + +(cherry picked from commit 5e7e0d603204ede803323a825318e365a87f73e9) +Change-Id: I7aa03f011b4a0304ea5651581bf95d0fc1e502d6 +--- + .../LoongArch/LoongArchFloat32InstrInfo.td | 8 +- + .../LoongArch/LoongArchFloat64InstrInfo.td | 6 +- + llvm/test/CodeGen/LoongArch/double-fma.ll | 259 ++++++++++++++++-- + llvm/test/CodeGen/LoongArch/float-fma.ll | 259 ++++++++++++++++-- + 4 files changed, 483 insertions(+), 49 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +index 826db54febd3..65120c083f49 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +@@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), + def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), + (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +-// fnmsub.s: -fj * fk + fa +-def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), ++// fnmsub.s: -(fj * fk - fa) ++def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), ++ (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; ++ ++// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + } // Predicates = [HasBasicF] + +diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +index 5118474725b6..437c1e4d7be2 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +@@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), + (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + + // fnmsub.d: -(fj * fk - fa) +-def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), ++def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), ++ (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; ++ ++// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) ++def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + } // Predicates = [HasBasicD] + +diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll +index 6dd628479433..58d20c62a668 100644 +--- a/llvm/test/CodeGen/LoongArch/double-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/double-fma.ll +@@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_d: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: +@@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_d: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %mul = fmul double %a, %b ++ %add = fadd double %mul, %negc ++ %neg = fneg double %add ++ ret double %neg ++} ++ ++define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz double %a ++ %mul = fmul nsz double %nega, %b ++ %add = fadd nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +@@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract double %c ++ %mul = fmul contract double %a, %b ++ %add = fadd contract double %mul, %negc ++ %neg = fneg contract double %add ++ ret double %neg ++} ++ ++define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz double %a ++ %mul = fmul contract nsz double %nega, %b ++ %add = fadd contract nsz double %mul, %c ++ ret double %add ++} ++ ++;; Check that fnmsub.d is not emitted. ++define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract double %a + %mul = fmul contract double %nega, %b +@@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call double @llvm.fma.f64(double %a, double %b, double %c) +- %neg = fneg double %fma +- ret double %neg ++ %negfma = fneg double %fma ++ ret double %negfma + } + + define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +@@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg double %c ++ %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) ++ %negfma = fneg double %fma ++ ret double %negfma ++} ++ ++define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg double %a +- %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) ++ %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +-define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++;; Check that fnmsub.d is not emitted. ++define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg double %b +- %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) ++ %nega = fneg double %a ++ %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %fma + } + +@@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract double %a, %b +- %sub = fsub contract double %c, %mul +- ret double %sub ++ %negc = fneg contract double %c ++ %add = fadd contract double %negc, %mul ++ %negadd = fneg contract double %add ++ ret double %negadd + } +diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll +index 54dc56784006..c236255d971a 100644 +--- a/llvm/test/CodeGen/LoongArch/float-fma.ll ++++ b/llvm/test/CodeGen/LoongArch/float-fma.ll +@@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA32-CONTRACT-ON-LABEL: fnmsub_s: + ; LA32-CONTRACT-ON: # %bb.0: + ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-ON-NEXT: ret + ; + ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA32-CONTRACT-OFF: # %bb.0: + ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 + ; LA32-CONTRACT-OFF-NEXT: ret + ; + ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: +@@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-ON-LABEL: fnmsub_s: + ; LA64-CONTRACT-ON: # %bb.0: + ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +-; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 + ; LA64-CONTRACT-ON-NEXT: ret + ; + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %mul = fmul float %a, %b ++ %add = fadd float %mul, %negc ++ %neg = fneg float %add ++ ret float %neg ++} ++ ++define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg nsz float %a ++ %mul = fmul nsz float %nega, %b ++ %add = fadd nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 ++; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 + ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +@@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg contract float %c ++ %mul = fmul contract float %a, %b ++ %add = fadd contract float %mul, %negc ++ %neg = fneg contract float %add ++ ret float %neg ++} ++ ++define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %nega = fneg contract nsz float %a ++ %mul = fmul contract nsz float %nega, %b ++ %add = fadd contract nsz float %mul, %c ++ ret float %add ++} ++ ++;; Check that fnmsub.s is not emitted. ++define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract float %a + %mul = fmul contract float %nega, %b +@@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %fma = call float @llvm.fma.f64(float %a, float %b, float %c) +- %neg = fneg float %fma +- ret float %neg ++ %negfma = fneg float %fma ++ ret float %negfma + } + + define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +@@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-OFF-NEXT: ret ++ %negc = fneg float %c ++ %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) ++ %negfma = fneg float %fma ++ ret float %negfma ++} ++ ++define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-FAST: # %bb.0: ++; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-FAST-NEXT: ret ++; ++; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-ON: # %bb.0: ++; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-ON-NEXT: ret ++; ++; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA32-CONTRACT-OFF: # %bb.0: ++; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA32-CONTRACT-OFF-NEXT: ret ++; ++; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-FAST: # %bb.0: ++; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-FAST-NEXT: ret ++; ++; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-ON: # %bb.0: ++; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ++; LA64-CONTRACT-ON-NEXT: ret ++; ++; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: ++; LA64-CONTRACT-OFF: # %bb.0: ++; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg float %a +- %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) ++ %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +-define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { +-; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++;; Check that fnmsub.s is not emitted. ++define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { ++; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-FAST: # %bb.0: +-; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-FAST-NEXT: ret + ; +-; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-ON: # %bb.0: +-; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-ON-NEXT: ret + ; +-; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA32-CONTRACT-OFF: # %bb.0: +-; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA32-CONTRACT-OFF-NEXT: ret + ; +-; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-FAST: # %bb.0: +-; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-FAST-NEXT: ret + ; +-; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-ON: # %bb.0: +-; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-ON-NEXT: ret + ; +-; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: ++; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: + ; LA64-CONTRACT-OFF: # %bb.0: +-; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 ++; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ++; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret +- %negb = fneg float %b +- %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) ++ %nega = fneg float %a ++ %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + ret float %fma + } + +@@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { + ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 + ; LA64-CONTRACT-OFF-NEXT: ret + %mul = fmul contract float %a, %b +- %sub = fsub contract float %c, %mul +- ret float %sub ++ %negc = fneg contract float %c ++ %add = fadd contract float %negc, %mul ++ %negadd = fneg contract float %add ++ ret float %negadd + } +-- +2.20.1 + diff --git a/0058-LoongArch-Fix-the-procossor-series-mask.patch b/0058-LoongArch-Fix-the-procossor-series-mask.patch new file mode 100644 index 0000000000000000000000000000000000000000..88c83d45874aff735c2f1b729146f343d5a1b5a2 --- /dev/null +++ b/0058-LoongArch-Fix-the-procossor-series-mask.patch @@ -0,0 +1,31 @@ +From 43d78731b1d4c7419d82b6fab2ae479652cd2b58 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Wed, 3 Jan 2024 13:59:12 +0800 +Subject: [PATCH 58/66] [LoongArch] Fix the procossor series mask + +Refer PRID_SERIES_MASK definition in linux kernel: +arch/loongarch/include/asm/cpu.h. + +(cherry picked from commit 7e186d366d6c7def0543acc255931f617e76dff0) +Change-Id: I32a641e85329b0dec7d538af6471f4c40775e0a5 +--- + llvm/lib/TargetParser/Host.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index 81309280a44b..d11dc605e188 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { + // Use processor id to detect cpu name. + uint32_t processor_id; + __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); +- switch (processor_id & 0xff00) { ++ // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. ++ switch (processor_id & 0xf000) { + case 0xc000: // Loongson 64bit, 4-issue + return "la464"; + // TODO: Others. +-- +2.20.1 + diff --git a/0059-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch b/0059-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch new file mode 100644 index 0000000000000000000000000000000000000000..df74cfe8d35f0af886da74e46920ae23620bad57 --- /dev/null +++ b/0059-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch @@ -0,0 +1,60 @@ +From 758014af0c9b2a54a57a8dc9dfa206be6be4161e Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Mon, 11 Mar 2024 08:59:17 +0800 +Subject: [PATCH 59/66] [LoongArch] Make sure that the LoongArchISD::BSTRINS + node uses the correct `MSB` value (#84454) + +The `MSB` must not be greater than `GRLen`. Without this patch, newly +added test cases will crash with LoongArch32, resulting in a 'cannot +select' error. + +(cherry picked from commit edd4c6c6dca4c556de22b2ab73d5bfc02d28e59b) +(cherry picked from commit d77c5c3830d925b3795e2f1535a6568399fe6626) +Change-Id: Idef25dd69d27324c2a90e3ed4ed0b39073e812b0 +--- + llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 +++- + llvm/test/CodeGen/LoongArch/bstrins_w.ll | 13 +++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index ed106cb766bc..5affaf37ad5a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -2310,7 +2310,9 @@ Retry: + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), +- DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), ++ DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) ++ : (MaskIdx0 + MaskLen0 - 1), ++ DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + +diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +index dfbe000841cd..e008caacad2a 100644 +--- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll ++++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll +@@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { + ret i32 %or + } + ++;; The high bits of `const` are zero. ++define i32 @pat5_high_zeros(i32 %a) nounwind { ++; CHECK-LABEL: pat5_high_zeros: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $a1, 1 ++; CHECK-NEXT: ori $a1, $a1, 564 ++; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 ++; CHECK-NEXT: ret ++ %and = and i32 %a, 65535 ; 0x0000ffff ++ %or = or i32 %and, 305397760 ; 0x12340000 ++ ret i32 %or ++} ++ + ;; Pattern 6: a = b | ((c & mask) << shamt) + ;; In this testcase b is 0x10000002, but in fact we do not require b being a + ;; constant. As long as all positions in b to be overwritten by the incoming +-- +2.20.1 + diff --git a/0060-LoongArch-Improve-codegen-for-atomic-ops-67391.patch b/0060-LoongArch-Improve-codegen-for-atomic-ops-67391.patch new file mode 100644 index 0000000000000000000000000000000000000000..f168b13fd755a4d08a540062593a37f4eaa73de9 --- /dev/null +++ b/0060-LoongArch-Improve-codegen-for-atomic-ops-67391.patch @@ -0,0 +1,1450 @@ +From 0a14b9c4408974055f7ab42501d45073808077eb Mon Sep 17 00:00:00 2001 +From: hev +Date: Wed, 11 Oct 2023 10:24:18 +0800 +Subject: [PATCH 60/66] [LoongArch] Improve codegen for atomic ops (#67391) + +This PR improves memory barriers generated by atomic operations. + +Memory barrier semantics of LL/SC: +``` +LL: + +SC: + +``` + +Changes: +* Remove unnecessary memory barriers before LL and between LL/SC. +* Fix acquire semantics. (If the SC instruction is not executed, then +the guarantee of acquiring semantics cannot be ensured. Therefore, an +acquire barrier needs to be generated when memory ordering includes an +acquire operation.) + +(cherry picked from commit 203ba238e33c570dba6cbcf247f1668bb2a13c26) + +Change-Id: I4ef87f94e7e01ae9bd93e1e57338534131e93590 +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 50 +-- + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 24 +- + .../ir-instruction/atomic-cmpxchg.ll | 376 ++++++++++++++++-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 24 +- + .../ir-instruction/atomicrmw-minmax.ll | 24 -- + .../LoongArch/ir-instruction/atomicrmw.ll | 31 -- + llvm/unittests/Target/LoongArch/InstSizes.cpp | 2 +- + 8 files changed, 407 insertions(+), 148 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index 51df0463e235..eb78ef065b21 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); +- AtomicOrdering Ordering = +- static_cast(MI.getOperand(4).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.[w|d] dest, (addr) + // binop scratch, dest, val + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, + TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) + .addReg(AddrReg) +@@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); +- AtomicOrdering Ordering = +- static_cast(MI.getOperand(5).getImm()); + + // .loop: +- // if(Ordering != AtomicOrdering::Monotonic) +- // dbar 0 + // ll.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch +@@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // beqz scratch, loop +- if (Ordering != AtomicOrdering::Monotonic) +- BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); +- auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); +- MF->insert(++LoopTailMBB->getIterator(), TailMBB); +- MF->insert(++TailMBB->getIterator(), DoneMBB); ++ MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); +- LoopTailMBB->addSuccessor(TailMBB); +- TailMBB->addSuccessor(DoneMBB); ++ LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); +@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + + // + // .loophead: +- // dbar 0 + // ll.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // move scratch1, destreg +- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) + .addReg(AddrReg) + .addImm(0); +@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + // .looptail: + // sc.w scratch1, scratch1, (addr) + // beqz scratch1, loop +- // dbar 0x700 + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) + .addReg(Scratch1Reg) + .addReg(AddrReg) +@@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + .addReg(Scratch1Reg) + .addMBB(LoopHeadMBB); + +- // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); +- + NextMBBI = MBB.end(); + MI.eraseFromParent(); + +@@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); +- computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addReg(CmpValReg) + .addMBB(TailMBB); + // .looptail: +- // dbar 0 + // move scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValReg) + .addReg(LoongArch::R0); +@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + .addMBB(TailMBB); + + // .looptail: +- // dbar 0 + // andn scratch, dest, mask + // or scratch, scratch, newval + // sc.[w|d] scratch, scratch, (addr) + // beqz scratch, loophead + // b done +- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) + .addReg(DestReg) + .addReg(MaskReg); +@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + ++ AtomicOrdering Ordering = ++ static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); ++ int hint; ++ ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // TODO: acquire ++ hint = 0; ++ break; ++ default: ++ hint = 0x700; ++ } ++ + // .tail: +- // dbar 0x700 +- BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); ++ // dbar 0x700 | acquire ++ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 05ae36a9781d..a9b0db30c2f6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1731,7 +1731,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1821,14 +1821,28 @@ def : AtomicPat; + +-def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; ++// Ordering constants must be kept in sync with the AtomicOrdering enum in ++// AtomicOrdering.h. ++multiclass PseudoCmpXchgPat { ++ def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; ++ def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; ++ def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; ++ def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; ++ def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; ++} ++ ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; ++defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), + (PseudoMaskedCmpXchg32 + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; +-def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), +- (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; + + def : PseudoMaskedAMMinMaxPat; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index f11af8fe6528..32106886c783 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -34,14 +34,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -88,14 +87,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a5, $a3, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -129,14 +127,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a1, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a6, $a5 + ; LA64-NEXT: sc.w $a6, $a0, 0 + ; LA64-NEXT: beqz $a6, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -168,14 +165,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB3_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB3_3 + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -224,14 +220,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -283,14 +278,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bne $a6, $a3, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $t0, $a7 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,14 +323,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a7, $a6 + ; LA64-NEXT: sc.w $a7, $a0, 0 + ; LA64-NEXT: beqz $a7, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -373,14 +366,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: bne $a2, $a3, .LBB7_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a5, $a4 + ; LA64-NEXT: sc.d $a5, $a0, 0 + ; LA64-NEXT: beqz $a5, .LBB7_3 + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 76e51fe7d3e8..1ac20d10e587 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -21,14 +21,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB0_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -56,14 +55,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: and $a5, $a4, $a0 + ; LA64-NEXT: bne $a5, $a1, .LBB1_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -77,13 +75,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -97,13 +94,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB3_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -130,14 +126,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB4_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -167,14 +162,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: and $a6, $a5, $a4 + ; LA64-NEXT: bne $a6, $a1, .LBB5_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -190,13 +184,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -212,13 +205,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB7_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -247,14 +239,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB8_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -287,14 +278,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: and $a6, $a5, $a2 + ; LA64-NEXT: bne $a6, $a1, .LBB9_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -313,13 +303,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -337,13 +326,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: ll.d $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB11_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 1792 ++; LA64-NEXT: dbar 0 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -352,3 +340,343 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res + } ++ ++define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a4 ++; LA64-NEXT: bne $a6, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a4 ++; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a0, $a2, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: addi.w $a2, $a4, 0 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a3, 0 ++; LA64-NEXT: and $a6, $a5, $a2 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: andn $a6, $a5, $a2 ++; LA64-NEXT: or $a6, $a6, $a0 ++; LA64-NEXT: sc.w $a6, $a3, 0 ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: and $a0, $a5, $a4 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: xor $a0, $a1, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: addi.w $a0, $a1, 0 ++; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: xor $a0, $a3, $a1 ++; LA64-NEXT: sltui $a0, $a0, 1 ++; LA64-NEXT: ret ++ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 9767717395b6..9a29d67e9982 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB0_3 + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB0_3 + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB1_3 + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB1_3 + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB2_3 + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB2_3 + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64F-NEXT: # %bb.4: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: move $a4, $a1 + ; LA64F-NEXT: sc.w $a4, $a0, 0 + ; LA64F-NEXT: beqz $a4, .LBB3_3 + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: dbar 0 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 + ; LA64D-NEXT: # %bb.4: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 +-; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: move $a4, $a1 + ; LA64D-NEXT: sc.w $a4, $a0, 0 + ; LA64D-NEXT: beqz $a4, .LBB3_3 + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: dbar 0 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +index cd4a9e7fa9c4..26ba77e8d4fd 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +@@ -17,7 +17,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -30,8 +29,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i8 %b acquire +@@ -52,7 +49,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -65,8 +61,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umax ptr %a, i16 %b acquire +@@ -106,7 +100,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -119,8 +112,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i8 %b acquire +@@ -141,7 +132,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a6, $a4, $a3 + ; LA64-NEXT: move $a5, $a4 +@@ -154,8 +144,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a5, $a2, 0 + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw umin ptr %a, i16 %b acquire +@@ -197,7 +185,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -212,8 +199,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i8 %b acquire +@@ -237,7 +222,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -252,8 +236,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw max ptr %a, i16 %b acquire +@@ -295,7 +277,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a4, $a0, 24 + ; LA64-NEXT: xori $a4, $a4, 56 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a3 + ; LA64-NEXT: move $a6, $a5 +@@ -310,8 +291,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB12_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i8 %b acquire +@@ -335,7 +314,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a5, $a2, 0 + ; LA64-NEXT: and $a7, $a5, $a4 + ; LA64-NEXT: move $a6, $a5 +@@ -350,8 +328,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sc.w $a6, $a2, 0 + ; LA64-NEXT: beqz $a6, .LBB13_1 + ; LA64-NEXT: # %bb.4: +-; LA64-NEXT: dbar 1792 +-; LA64-NEXT: # %bb.5: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %1 = atomicrmw min ptr %a, i16 %b acquire +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +index c077d14f728f..626276ba05f7 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +@@ -13,7 +13,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -37,7 +36,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -64,7 +62,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -89,7 +86,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -108,7 +104,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: move $a3, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -157,7 +152,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -181,7 +175,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -208,7 +201,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -233,7 +225,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -252,7 +243,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_add_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: add.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -301,7 +291,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -325,7 +314,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -352,7 +340,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 +@@ -377,7 +364,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 +@@ -396,7 +382,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_sub_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: sub.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -447,7 +432,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -472,7 +456,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -500,7 +483,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -526,7 +508,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -546,7 +527,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_nand_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: nor $a3, $a3, $zero +@@ -559,7 +539,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i32_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.w $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -586,7 +565,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { + ; LA64-LABEL: atomicrmw_nand_i64_acquire: + ; LA64: # %bb.0: + ; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: dbar 0 + ; LA64-NEXT: ll.d $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero +@@ -611,7 +589,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: addi.w $a3, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a3 + ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a0, 0 + ; LA32-NEXT: and $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -650,7 +627,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: addi.w $a2, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a2 + ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a4, $a2, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +@@ -681,7 +657,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_and_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -728,7 +703,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -760,7 +734,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -787,7 +760,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_or_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: or $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +@@ -834,7 +806,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -866,7 +837,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 + ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +@@ -893,7 +863,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xor_i32_acquire: + ; LA32: # %bb.0: + ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +-; LA32-NEXT: dbar 0 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: xor $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp +index 1a5d4369c48b..3180c7237a79 100644 +--- a/llvm/unittests/Target/LoongArch/InstSizes.cpp ++++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp +@@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { + " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" + " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" +- " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" ++ " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { +-- +2.20.1 + diff --git a/0061-LoongArch-Add-some-atomic-tests-68766.patch b/0061-LoongArch-Add-some-atomic-tests-68766.patch new file mode 100644 index 0000000000000000000000000000000000000000..b97e5637dacdf10542fe04b87d9a88a8fa03984b --- /dev/null +++ b/0061-LoongArch-Add-some-atomic-tests-68766.patch @@ -0,0 +1,8108 @@ +From 3d3161280313c38aad695bc96a27bdba5a804e53 Mon Sep 17 00:00:00 2001 +From: hev +Date: Wed, 11 Oct 2023 18:28:04 +0800 +Subject: [PATCH 61/66] [LoongArch] Add some atomic tests (#68766) + +(cherry picked from commit 37b93f07cd7ba2b1e6e81116cd49d34396b7b70a) + +Change-Id: I63a1fee7f5f715f1b1c97dec294370eb2d7fd877 +--- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 2714 ++++++++++++- + .../ir-instruction/atomicrmw-minmax.ll | 1400 +++++++ + .../LoongArch/ir-instruction/atomicrmw.ll | 3346 ++++++++++++++++- + .../ir-instruction/fence-singlethread.ll | 17 + + .../ir-instruction/load-store-atomic.ll | 196 + + 5 files changed, 7609 insertions(+), 64 deletions(-) + create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll + +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 9a29d67e9982..02d481cb3865 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -2,8 +2,6 @@ + ; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F + ; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +-;; Add more test cases after supporting different AtomicOrdering. +- + define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-LABEL: float_fadd_acquire: + ; LA64F: # %bb.0: +@@ -681,3 +679,2715 @@ define double @double_fmax_acquire(ptr %p) nounwind { + %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 + ret double %v + } ++ ++define float @float_fadd_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB8_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB8_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB8_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB8_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB8_3 ++; LA64F-NEXT: b .LBB8_6 ++; LA64F-NEXT: .LBB8_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB8_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB8_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB8_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB8_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB8_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB8_3 ++; LA64D-NEXT: b .LBB8_6 ++; LA64D-NEXT: .LBB8_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB8_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB9_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB9_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB9_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB9_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB9_3 ++; LA64F-NEXT: b .LBB9_6 ++; LA64F-NEXT: .LBB9_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB9_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI9_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB9_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB9_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB9_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB9_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB9_3 ++; LA64D-NEXT: b .LBB9_6 ++; LA64D-NEXT: .LBB9_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB9_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB10_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB10_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB10_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB10_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB10_3 ++; LA64F-NEXT: b .LBB10_6 ++; LA64F-NEXT: .LBB10_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB10_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB10_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB10_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB10_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB10_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB10_3 ++; LA64D-NEXT: b .LBB10_6 ++; LA64D-NEXT: .LBB10_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB10_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_release(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB11_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB11_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB11_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB11_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB11_3 ++; LA64F-NEXT: b .LBB11_6 ++; LA64F-NEXT: .LBB11_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB11_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB11_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB11_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB11_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB11_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB11_3 ++; LA64D-NEXT: b .LBB11_6 ++; LA64D-NEXT: .LBB11_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB11_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB12_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB12_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB12_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB12_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB13_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB13_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI13_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB13_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB13_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB14_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB14_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB14_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB14_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_release(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_release: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 3 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB15_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB15_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_release: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 3 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB15_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB15_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB16_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB16_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB16_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB16_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB16_3 ++; LA64F-NEXT: b .LBB16_6 ++; LA64F-NEXT: .LBB16_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB16_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB16_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB16_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB16_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB16_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB16_3 ++; LA64D-NEXT: b .LBB16_6 ++; LA64D-NEXT: .LBB16_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB16_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB17_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB17_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB17_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB17_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB17_3 ++; LA64F-NEXT: b .LBB17_6 ++; LA64F-NEXT: .LBB17_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB17_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB17_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB17_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB17_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB17_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB17_3 ++; LA64D-NEXT: b .LBB17_6 ++; LA64D-NEXT: .LBB17_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB17_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB18_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB18_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB18_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB18_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB18_3 ++; LA64F-NEXT: b .LBB18_6 ++; LA64F-NEXT: .LBB18_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB18_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB18_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB18_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB18_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB18_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB18_3 ++; LA64D-NEXT: b .LBB18_6 ++; LA64D-NEXT: .LBB18_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB18_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB19_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB19_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB19_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB19_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB19_3 ++; LA64F-NEXT: b .LBB19_6 ++; LA64F-NEXT: .LBB19_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB19_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB19_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB19_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB19_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB19_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB19_3 ++; LA64D-NEXT: b .LBB19_6 ++; LA64D-NEXT: .LBB19_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB19_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB20_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB20_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB20_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB20_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB21_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB21_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB21_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB21_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB22_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB22_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB22_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB22_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acq_rel(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_acq_rel: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: ori $s4, $zero, 4 ++; LA64F-NEXT: ori $s5, $zero, 2 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB23_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s5 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB23_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_acq_rel: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: ori $s3, $zero, 4 ++; LA64D-NEXT: ori $s4, $zero, 2 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB23_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s4 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB23_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB24_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB24_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB24_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB24_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB24_3 ++; LA64F-NEXT: b .LBB24_6 ++; LA64F-NEXT: .LBB24_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB24_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB24_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB24_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB24_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB24_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB24_3 ++; LA64D-NEXT: b .LBB24_6 ++; LA64D-NEXT: .LBB24_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB24_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB25_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB25_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB25_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB25_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB25_3 ++; LA64F-NEXT: b .LBB25_6 ++; LA64F-NEXT: .LBB25_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB25_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI25_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB25_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB25_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB25_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB25_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB25_3 ++; LA64D-NEXT: b .LBB25_6 ++; LA64D-NEXT: .LBB25_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB25_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB26_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB26_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB26_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB26_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB26_3 ++; LA64F-NEXT: b .LBB26_6 ++; LA64F-NEXT: .LBB26_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB26_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB26_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB26_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB26_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB26_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB26_3 ++; LA64D-NEXT: b .LBB26_6 ++; LA64D-NEXT: .LBB26_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB26_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB27_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB27_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB27_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB27_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB27_3 ++; LA64F-NEXT: b .LBB27_6 ++; LA64F-NEXT: .LBB27_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: .LBB27_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB27_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB27_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB27_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB27_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB27_3 ++; LA64D-NEXT: b .LBB27_6 ++; LA64D-NEXT: .LBB27_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: .LBB27_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB28_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB28_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB28_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB28_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB29_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB29_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI29_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB29_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB29_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB30_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB30_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB30_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB30_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_seq_cst(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_seq_cst: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -80 ++; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 16 ++; LA64F-NEXT: addi.d $s3, $sp, 8 ++; LA64F-NEXT: ori $s4, $zero, 5 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB31_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 16 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $s4 ++; LA64F-NEXT: move $a5, $s4 ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 16 ++; LA64F-NEXT: beqz $a1, .LBB31_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 80 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_seq_cst: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -80 ++; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 16 ++; LA64D-NEXT: addi.d $s2, $sp, 8 ++; LA64D-NEXT: ori $s3, $zero, 5 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB31_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 16 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $s3 ++; LA64D-NEXT: move $a5, $s3 ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 16 ++; LA64D-NEXT: beqz $a0, .LBB31_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 80 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fadd_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB32_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB32_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB32_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB32_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB32_3 ++; LA64F-NEXT: b .LBB32_6 ++; LA64F-NEXT: .LBB32_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB32_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fadd_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB32_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB32_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB32_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB32_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB32_3 ++; LA64D-NEXT: b .LBB32_6 ++; LA64D-NEXT: .LBB32_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB32_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fsub_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) ++; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) ++; LA64F-NEXT: fld.s $fa1, $a1, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB33_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB33_3 Depth 2 ++; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB33_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB33_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB33_3 ++; LA64F-NEXT: b .LBB33_6 ++; LA64F-NEXT: .LBB33_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB33_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fsub_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) ++; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI33_0) ++; LA64D-NEXT: fld.s $fa1, $a1, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB33_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB33_3 Depth 2 ++; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB33_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB33_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB33_3 ++; LA64D-NEXT: b .LBB33_6 ++; LA64D-NEXT: .LBB33_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB33_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fmin_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB34_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB34_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB34_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB34_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB34_3 ++; LA64F-NEXT: b .LBB34_6 ++; LA64F-NEXT: .LBB34_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB34_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmin_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB34_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB34_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB34_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB34_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB34_3 ++; LA64D-NEXT: b .LBB34_6 ++; LA64D-NEXT: .LBB34_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB34_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: float_fmax_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: fld.s $fa0, $a0, 0 ++; LA64F-NEXT: addi.w $a1, $zero, 1 ++; LA64F-NEXT: movgr2fr.w $fa1, $a1 ++; LA64F-NEXT: ffint.s.w $fa1, $fa1 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB35_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Loop Header: Depth=1 ++; LA64F-NEXT: # Child Loop BB35_3 Depth 2 ++; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64F-NEXT: movfr2gr.s $a1, $fa2 ++; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: .LBB35_3: # %atomicrmw.start ++; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 ++; LA64F-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64F-NEXT: ll.w $a3, $a0, 0 ++; LA64F-NEXT: bne $a3, $a2, .LBB35_5 ++; LA64F-NEXT: # %bb.4: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_3 Depth=2 ++; LA64F-NEXT: move $a4, $a1 ++; LA64F-NEXT: sc.w $a4, $a0, 0 ++; LA64F-NEXT: beqz $a4, .LBB35_3 ++; LA64F-NEXT: b .LBB35_6 ++; LA64F-NEXT: .LBB35_5: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64F-NEXT: dbar 1792 ++; LA64F-NEXT: .LBB35_6: # %atomicrmw.start ++; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64F-NEXT: movgr2fr.w $fa0, $a3 ++; LA64F-NEXT: addi.w $a1, $a2, 0 ++; LA64F-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: float_fmax_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: fld.s $fa0, $a0, 0 ++; LA64D-NEXT: addi.w $a1, $zero, 1 ++; LA64D-NEXT: movgr2fr.w $fa1, $a1 ++; LA64D-NEXT: ffint.s.w $fa1, $fa1 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB35_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Loop Header: Depth=1 ++; LA64D-NEXT: # Child Loop BB35_3 Depth 2 ++; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 ++; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ++; LA64D-NEXT: movfr2gr.s $a1, $fa2 ++; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: .LBB35_3: # %atomicrmw.start ++; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 ++; LA64D-NEXT: # => This Inner Loop Header: Depth=2 ++; LA64D-NEXT: ll.w $a3, $a0, 0 ++; LA64D-NEXT: bne $a3, $a2, .LBB35_5 ++; LA64D-NEXT: # %bb.4: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_3 Depth=2 ++; LA64D-NEXT: move $a4, $a1 ++; LA64D-NEXT: sc.w $a4, $a0, 0 ++; LA64D-NEXT: beqz $a4, .LBB35_3 ++; LA64D-NEXT: b .LBB35_6 ++; LA64D-NEXT: .LBB35_5: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64D-NEXT: dbar 1792 ++; LA64D-NEXT: .LBB35_6: # %atomicrmw.start ++; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; LA64D-NEXT: movgr2fr.w $fa0, $a3 ++; LA64D-NEXT: addi.w $a1, $a2, 0 ++; LA64D-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fadd_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB36_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB36_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fadd_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB36_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB36_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fsub_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, -1025 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB37_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(__adddf3) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB37_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fsub_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0) ++; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI37_0) ++; LA64D-NEXT: fld.d $fs0, $a0, 0 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB37_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB37_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fmin_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB38_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmin) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB38_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmin_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB38_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB38_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_monotonic(ptr %p) nounwind { ++; LA64F-LABEL: double_fmax_monotonic: ++; LA64F: # %bb.0: ++; LA64F-NEXT: addi.d $sp, $sp, -64 ++; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ++; LA64F-NEXT: move $fp, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 ++; LA64F-NEXT: lu52i.d $s0, $zero, 1023 ++; LA64F-NEXT: ori $s1, $zero, 8 ++; LA64F-NEXT: addi.d $s2, $sp, 8 ++; LA64F-NEXT: addi.d $s3, $sp, 0 ++; LA64F-NEXT: .p2align 4, , 16 ++; LA64F-NEXT: .LBB39_1: # %atomicrmw.start ++; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64F-NEXT: st.d $a0, $sp, 8 ++; LA64F-NEXT: move $a1, $s0 ++; LA64F-NEXT: bl %plt(fmax) ++; LA64F-NEXT: st.d $a0, $sp, 0 ++; LA64F-NEXT: move $a0, $s1 ++; LA64F-NEXT: move $a1, $fp ++; LA64F-NEXT: move $a2, $s2 ++; LA64F-NEXT: move $a3, $s3 ++; LA64F-NEXT: move $a4, $zero ++; LA64F-NEXT: move $a5, $zero ++; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64F-NEXT: move $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $sp, 8 ++; LA64F-NEXT: beqz $a1, .LBB39_1 ++; LA64F-NEXT: # %bb.2: # %atomicrmw.end ++; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64F-NEXT: addi.d $sp, $sp, 64 ++; LA64F-NEXT: ret ++; ++; LA64D-LABEL: double_fmax_monotonic: ++; LA64D: # %bb.0: ++; LA64D-NEXT: addi.d $sp, $sp, -64 ++; LA64D-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill ++; LA64D-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ++; LA64D-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill ++; LA64D-NEXT: move $fp, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 ++; LA64D-NEXT: addi.d $a0, $zero, 1 ++; LA64D-NEXT: movgr2fr.d $fa1, $a0 ++; LA64D-NEXT: ffint.d.l $fs0, $fa1 ++; LA64D-NEXT: ori $s0, $zero, 8 ++; LA64D-NEXT: addi.d $s1, $sp, 8 ++; LA64D-NEXT: addi.d $s2, $sp, 0 ++; LA64D-NEXT: .p2align 4, , 16 ++; LA64D-NEXT: .LBB39_1: # %atomicrmw.start ++; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ++; LA64D-NEXT: fst.d $fa0, $sp, 8 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 ++; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ++; LA64D-NEXT: fst.d $fa0, $sp, 0 ++; LA64D-NEXT: move $a0, $s0 ++; LA64D-NEXT: move $a1, $fp ++; LA64D-NEXT: move $a2, $s1 ++; LA64D-NEXT: move $a3, $s2 ++; LA64D-NEXT: move $a4, $zero ++; LA64D-NEXT: move $a5, $zero ++; LA64D-NEXT: bl %plt(__atomic_compare_exchange) ++; LA64D-NEXT: fld.d $fa0, $sp, 8 ++; LA64D-NEXT: beqz $a0, .LBB39_1 ++; LA64D-NEXT: # %bb.2: # %atomicrmw.end ++; LA64D-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload ++; LA64D-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; LA64D-NEXT: addi.d $sp, $sp, 64 ++; LA64D-NEXT: ret ++ %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +index 26ba77e8d4fd..770358a05bfd 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +@@ -353,3 +353,1403 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { + %1 = atomicrmw min ptr %a, i64 %b acquire + ret i64 %1 + } ++ ++define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB16_3: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB16_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB17_3: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB17_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB20_3: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB20_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB21_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB24_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB25_3: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB25_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB28_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB28_3: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB28_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB29_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB29_3: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB29_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB32_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB32_3: # in Loop: Header=BB32_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB32_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB33_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB33_3: # in Loop: Header=BB33_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB33_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB36_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB37_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB40_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB40_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB41_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB41_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB44_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB44_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB45_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB45_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB48_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB48_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB49_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB49_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB52_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB52_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB53_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB53_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB56_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB56_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB57_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB57_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB60_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB60_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB61_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB61_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB64_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB64_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a6, $a1, .LBB65_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB65_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB65_3: # in Loop: Header=BB65_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB65_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umax_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umax ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB68_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB68_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB68_3: # in Loop: Header=BB68_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB68_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a6, $a4, $a3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: bgeu $a1, $a6, .LBB69_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB69_1 Depth=1 ++; LA64-NEXT: xor $a5, $a4, $a1 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: .LBB69_3: # in Loop: Header=BB69_1 Depth=1 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB69_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.wu $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_umin_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.du $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw umin ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a7, $a1, .LBB72_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB72_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB72_3: # in Loop: Header=BB72_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB72_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a7, $a1, .LBB73_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB73_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB73_3: # in Loop: Header=BB73_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB73_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_max_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammax_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw max ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i8_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: ext.w.b $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: andi $a4, $a0, 24 ++; LA64-NEXT: xori $a4, $a4, 56 ++; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a4 ++; LA64-NEXT: sra.w $a7, $a7, $a4 ++; LA64-NEXT: bge $a1, $a7, .LBB76_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB76_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a3 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB76_3: # in Loop: Header=BB76_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB76_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i16_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a3, $a0, 24 ++; LA64-NEXT: ori $a4, $zero, 48 ++; LA64-NEXT: sub.d $a3, $a4, $a3 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a4, $a4, $a0 ++; LA64-NEXT: addi.w $a4, $a4, 0 ++; LA64-NEXT: ext.w.h $a1, $a1 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a5, $a2, 0 ++; LA64-NEXT: and $a7, $a5, $a4 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sll.w $a7, $a7, $a3 ++; LA64-NEXT: sra.w $a7, $a7, $a3 ++; LA64-NEXT: bge $a1, $a7, .LBB77_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB77_1 Depth=1 ++; LA64-NEXT: xor $a6, $a5, $a1 ++; LA64-NEXT: and $a6, $a6, $a4 ++; LA64-NEXT: xor $a6, $a5, $a6 ++; LA64-NEXT: .LBB77_3: # in Loop: Header=BB77_1 Depth=1 ++; LA64-NEXT: sc.w $a6, $a2, 0 ++; LA64-NEXT: beqz $a6, .LBB77_1 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i32_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ++; LA64-LABEL: atomicrmw_min_i64_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: ammin_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw min ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +index 626276ba05f7..94a26e4ed9c7 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +@@ -900,6 +900,3228 @@ define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { + ret i64 %1 + } + ++define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB28_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB28_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB29_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB29_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB30_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB30_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB31_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB31_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB32_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB32_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB33_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB33_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB34_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB36_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB37_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB38_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB40_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB40_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB41_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB41_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB42_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB44_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB44_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB45_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB45_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB46_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB46_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB47_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB48_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB49_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB50_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB52_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB53_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB54_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB56_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB57_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_release: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_release: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB58_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_release: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 3 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_release: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB60_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB60_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB61_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB61_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB62_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB62_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB63_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB63_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB64_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB64_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB65_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB65_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB66_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB68_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB68_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB69_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB69_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB70_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB72_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB72_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB73_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB73_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB74_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB76_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB76_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB77_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB77_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB78_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB78_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB79_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB80_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB81_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB82_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB84_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB85_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB86_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB88_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB89_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB90_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_acq_rel: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 4 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB92_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB92_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB93_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB93_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a2, $zero, 255 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB94_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a2, $zero, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB94_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: addi.w $a5, $a1, 0 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB95_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: addi.w $a5, $a1, 0 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB95_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $zero, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB96_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $zero, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB96_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ++; LA32-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a1, $zero, -4 ++; LA32-NEXT: and $a1, $a0, $a1 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: sll.w $a2, $a2, $a0 ++; LA32-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a1, 0 ++; LA32-NEXT: addi.w $a4, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: and $a4, $a4, $a2 ++; LA32-NEXT: xor $a4, $a3, $a4 ++; LA32-NEXT: sc.w $a4, $a1, 0 ++; LA32-NEXT: beqz $a4, .LBB97_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $zero, -4 ++; LA64-NEXT: and $a1, $a0, $a1 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a1, 0 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: and $a4, $a4, $a2 ++; LA64-NEXT: xor $a4, $a3, $a4 ++; LA64-NEXT: sc.w $a4, $a1, 0 ++; LA64-NEXT: beqz $a4, .LBB97_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: move $a3, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB98_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xchg_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_exchange_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xchg_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amswap_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xchg ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB100_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB100_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: add.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB101_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: add.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB101_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: add.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB102_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_add_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_add_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_add_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amadd_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw add ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB104_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB104_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: sub.w $a5, $a4, $a1 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB105_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: sub.w $a5, $a4, $a1 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB105_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: sub.w $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB106_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.w $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.w $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_sub_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_sub_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_sub_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: sub.d $a2, $zero, $a1 ++; LA64-NEXT: amadd_db.d $a1, $a2, $a0 ++; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: ret ++ %1 = atomicrmw sub ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB108_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB108_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: lu12i.w $a3, 15 ++; LA32-NEXT: ori $a3, $a3, 4095 ++; LA32-NEXT: sll.w $a3, $a3, $a0 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a4, $a2, 0 ++; LA32-NEXT: and $a5, $a4, $a1 ++; LA32-NEXT: nor $a5, $a5, $zero ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: and $a5, $a5, $a3 ++; LA32-NEXT: xor $a5, $a4, $a5 ++; LA32-NEXT: sc.w $a5, $a2, 0 ++; LA32-NEXT: beqz $a5, .LBB109_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a4, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: lu12i.w $a3, 15 ++; LA64-NEXT: ori $a3, $a3, 4095 ++; LA64-NEXT: sll.w $a3, $a3, $a0 ++; LA64-NEXT: addi.w $a3, $a3, 0 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a2, 0 ++; LA64-NEXT: and $a5, $a4, $a1 ++; LA64-NEXT: nor $a5, $a5, $zero ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: and $a5, $a5, $a3 ++; LA64-NEXT: xor $a5, $a4, $a5 ++; LA64-NEXT: sc.w $a5, $a2, 0 ++; LA64-NEXT: beqz $a5, .LBB109_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: srl.w $a0, $a4, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: nor $a3, $a3, $zero ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB110_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.w $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB110_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_nand_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_nand_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_nand_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a2, $a0, 0 ++; LA64-NEXT: and $a3, $a2, $a1 ++; LA64-NEXT: nor $a3, $a3, $zero ++; LA64-NEXT: sc.d $a3, $a0, 0 ++; LA64-NEXT: beqz $a3, .LBB111_1 ++; LA64-NEXT: # %bb.2: ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw nand ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: slli.w $a2, $a0, 3 ++; LA32-NEXT: ori $a3, $zero, 255 ++; LA32-NEXT: sll.w $a3, $a3, $a2 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a2 ++; LA32-NEXT: orn $a1, $a1, $a3 ++; LA32-NEXT: addi.w $a3, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a3 ++; LA32-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a0, 0 ++; LA32-NEXT: and $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB112_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.d $a2, $a0, 3 ++; LA64-NEXT: ori $a3, $zero, 255 ++; LA64-NEXT: sll.w $a3, $a3, $a2 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a2 ++; LA64-NEXT: orn $a1, $a1, $a3 ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a3 ++; LA64-NEXT: amand_db.w $a3, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: lu12i.w $a2, 15 ++; LA32-NEXT: ori $a2, $a2, 4095 ++; LA32-NEXT: slli.w $a3, $a0, 3 ++; LA32-NEXT: sll.w $a2, $a2, $a3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a3 ++; LA32-NEXT: orn $a1, $a1, $a2 ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a0, $a0, $a2 ++; LA32-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a4, $a2, $a1 ++; LA32-NEXT: sc.w $a4, $a0, 0 ++; LA32-NEXT: beqz $a4, .LBB113_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a2, $a3 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: lu12i.w $a2, 15 ++; LA64-NEXT: ori $a2, $a2, 4095 ++; LA64-NEXT: slli.d $a3, $a0, 3 ++; LA64-NEXT: sll.w $a2, $a2, $a3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a3 ++; LA64-NEXT: orn $a1, $a1, $a2 ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a0, $a0, $a2 ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: srl.w $a0, $a2, $a3 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: and $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB114_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_and_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_and_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_and_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amand_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw and ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB116_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: or $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB117_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: or $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB118_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_or_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_or_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_or_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw or ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i8_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: andi $a1, $a1, 255 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB120_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i8_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i16_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $a2, $zero, -4 ++; LA32-NEXT: and $a2, $a0, $a2 ++; LA32-NEXT: slli.w $a0, $a0, 3 ++; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ++; LA32-NEXT: sll.w $a1, $a1, $a0 ++; LA32-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a3, $a2, 0 ++; LA32-NEXT: xor $a4, $a3, $a1 ++; LA32-NEXT: sc.w $a4, $a2, 0 ++; LA32-NEXT: beqz $a4, .LBB121_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: srl.w $a0, $a3, $a0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i16_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a2, $zero, -4 ++; LA64-NEXT: and $a2, $a0, $a2 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: amxor_db.w $a3, $a1, $a2 ++; LA64-NEXT: srl.w $a0, $a3, $a0 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i32_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: ll.w $a2, $a0, 0 ++; LA32-NEXT: xor $a3, $a2, $a1 ++; LA32-NEXT: sc.w $a3, $a0, 0 ++; LA32-NEXT: beqz $a3, .LBB122_1 ++; LA32-NEXT: # %bb.2: ++; LA32-NEXT: move $a0, $a2 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i32_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.w $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; LA32-LABEL: atomicrmw_xor_i64_seq_cst: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: ori $a3, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_fetch_xor_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: atomicrmw_xor_i64_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: amxor_db.d $a2, $a1, $a0 ++; LA64-NEXT: move $a0, $a2 ++; LA64-NEXT: ret ++ %1 = atomicrmw xor ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ + define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i8_monotonic: + ; LA32: # %bb.0: +@@ -910,14 +4132,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB28_1 ++; LA32-NEXT: beqz $a5, .LBB124_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -933,14 +4155,14 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB28_1 ++; LA64-NEXT: beqz $a5, .LBB124_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -959,14 +4181,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: addi.w $a5, $a1, 0 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB29_1 ++; LA32-NEXT: beqz $a5, .LBB125_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -983,14 +4205,14 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: addi.w $a5, $a1, 0 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB29_1 ++; LA64-NEXT: beqz $a5, .LBB125_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1001,11 +4223,11 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xchg_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: move $a3, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB30_1 ++; LA32-NEXT: beqz $a3, .LBB126_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1049,14 +4271,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB32_1 ++; LA32-NEXT: beqz $a5, .LBB128_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1072,14 +4294,14 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB32_1 ++; LA64-NEXT: beqz $a5, .LBB128_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1098,14 +4320,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: add.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB33_1 ++; LA32-NEXT: beqz $a5, .LBB129_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1122,14 +4344,14 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: add.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB33_1 ++; LA64-NEXT: beqz $a5, .LBB129_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1140,11 +4362,11 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_add_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: add.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB34_1 ++; LA32-NEXT: beqz $a3, .LBB130_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1188,14 +4410,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB36_1 ++; LA32-NEXT: beqz $a5, .LBB132_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1211,14 +4433,14 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB36_1 ++; LA64-NEXT: beqz $a5, .LBB132_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1237,14 +4459,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: sub.w $a5, $a4, $a1 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB37_1 ++; LA32-NEXT: beqz $a5, .LBB133_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1261,14 +4483,14 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: sub.w $a5, $a4, $a1 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB37_1 ++; LA64-NEXT: beqz $a5, .LBB133_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1279,11 +4501,11 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_sub_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB134_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: sub.w $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB38_1 ++; LA32-NEXT: beqz $a3, .LBB134_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1329,7 +4551,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -1337,7 +4559,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB40_1 ++; LA32-NEXT: beqz $a5, .LBB136_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1353,7 +4575,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -1361,7 +4583,7 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB40_1 ++; LA64-NEXT: beqz $a5, .LBB136_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1380,7 +4602,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: sll.w $a3, $a3, $a0 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a4, $a2, 0 + ; LA32-NEXT: and $a5, $a4, $a1 + ; LA32-NEXT: nor $a5, $a5, $zero +@@ -1388,7 +4610,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: and $a5, $a5, $a3 + ; LA32-NEXT: xor $a5, $a4, $a5 + ; LA32-NEXT: sc.w $a5, $a2, 0 +-; LA32-NEXT: beqz $a5, .LBB41_1 ++; LA32-NEXT: beqz $a5, .LBB137_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a4, $a0 + ; LA32-NEXT: ret +@@ -1405,7 +4627,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a2, 0 + ; LA64-NEXT: and $a5, $a4, $a1 + ; LA64-NEXT: nor $a5, $a5, $zero +@@ -1413,7 +4635,7 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA64-NEXT: and $a5, $a5, $a3 + ; LA64-NEXT: xor $a5, $a4, $a5 + ; LA64-NEXT: sc.w $a5, $a2, 0 +-; LA64-NEXT: beqz $a5, .LBB41_1 ++; LA64-NEXT: beqz $a5, .LBB137_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: srl.w $a0, $a4, $a0 + ; LA64-NEXT: ret +@@ -1424,24 +4646,24 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_nand_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: nor $a3, $a3, $zero + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB42_1 ++; LA32-NEXT: beqz $a3, .LBB138_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: atomicrmw_nand_i32_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB138_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero + ; LA64-NEXT: sc.w $a3, $a0, 0 +-; LA64-NEXT: beqz $a3, .LBB42_1 ++; LA64-NEXT: beqz $a3, .LBB138_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +@@ -1462,12 +4684,12 @@ define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { + ; + ; LA64-LABEL: atomicrmw_nand_i64_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB139_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a2, $a0, 0 + ; LA64-NEXT: and $a3, $a2, $a1 + ; LA64-NEXT: nor $a3, $a3, $zero + ; LA64-NEXT: sc.d $a3, $a0, 0 +-; LA64-NEXT: beqz $a3, .LBB43_1 ++; LA64-NEXT: beqz $a3, .LBB139_1 + ; LA64-NEXT: # %bb.2: + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +@@ -1486,11 +4708,11 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: orn $a1, $a1, $a3 + ; LA32-NEXT: addi.w $a3, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a3 +-; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a0, 0 + ; LA32-NEXT: and $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +-; LA32-NEXT: beqz $a4, .LBB44_1 ++; LA32-NEXT: beqz $a4, .LBB140_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a2 + ; LA32-NEXT: ret +@@ -1524,11 +4746,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: orn $a1, $a1, $a2 + ; LA32-NEXT: addi.w $a2, $zero, -4 + ; LA32-NEXT: and $a0, $a0, $a2 +-; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a4, $a2, $a1 + ; LA32-NEXT: sc.w $a4, $a0, 0 +-; LA32-NEXT: beqz $a4, .LBB45_1 ++; LA32-NEXT: beqz $a4, .LBB141_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a2, $a3 + ; LA32-NEXT: ret +@@ -1554,11 +4776,11 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_and_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB142_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: and $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB46_1 ++; LA32-NEXT: beqz $a3, .LBB142_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1600,11 +4822,11 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB48_1 ++; LA32-NEXT: beqz $a4, .LBB144_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1631,11 +4853,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: or $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB49_1 ++; LA32-NEXT: beqz $a4, .LBB145_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1657,11 +4879,11 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_or_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: or $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB50_1 ++; LA32-NEXT: beqz $a3, .LBB146_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +@@ -1703,11 +4925,11 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: andi $a1, $a1, 255 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB52_1 ++; LA32-NEXT: beqz $a4, .LBB148_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1734,11 +4956,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { + ; LA32-NEXT: slli.w $a0, $a0, 3 + ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 + ; LA32-NEXT: sll.w $a1, $a1, $a0 +-; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a3, $a2, 0 + ; LA32-NEXT: xor $a4, $a3, $a1 + ; LA32-NEXT: sc.w $a4, $a2, 0 +-; LA32-NEXT: beqz $a4, .LBB53_1 ++; LA32-NEXT: beqz $a4, .LBB149_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: srl.w $a0, $a3, $a0 + ; LA32-NEXT: ret +@@ -1760,11 +4982,11 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { + define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { + ; LA32-LABEL: atomicrmw_xor_i32_monotonic: + ; LA32: # %bb.0: +-; LA32-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 ++; LA32-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1 + ; LA32-NEXT: ll.w $a2, $a0, 0 + ; LA32-NEXT: xor $a3, $a2, $a1 + ; LA32-NEXT: sc.w $a3, $a0, 0 +-; LA32-NEXT: beqz $a3, .LBB54_1 ++; LA32-NEXT: beqz $a3, .LBB150_1 + ; LA32-NEXT: # %bb.2: + ; LA32-NEXT: move $a0, $a2 + ; LA32-NEXT: ret +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +new file mode 100644 +index 000000000000..8d6056bc7677 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +@@ -0,0 +1,17 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @fence_singlethread() { ++; LA32-LABEL: fence_singlethread: ++; LA32: # %bb.0: ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: fence_singlethread: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ fence syncscope("singlethread") seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +index e91d0c145eab..deff11723d27 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +@@ -72,6 +72,202 @@ define i64 @load_acquire_i64(ptr %ptr) { + ret i64 %val + } + ++define i8 @load_unordered_i8(ptr %ptr) { ++; LA32-LABEL: load_unordered_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr unordered, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_unordered_i16(ptr %ptr) { ++; LA32-LABEL: load_unordered_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr unordered, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_unordered_i32(ptr %ptr) { ++; LA32-LABEL: load_unordered_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr unordered, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_unordered_i64(ptr %ptr) { ++; LA32-LABEL: load_unordered_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: move $a1, $zero ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_unordered_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr unordered, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_monotonic_i8(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr monotonic, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_monotonic_i16(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr monotonic, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_monotonic_i32(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr monotonic, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_monotonic_i64(ptr %ptr) { ++; LA32-LABEL: load_monotonic_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: move $a1, $zero ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_monotonic_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr monotonic, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_seq_cst_i8(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i8: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.b $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i8: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.b $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i8, ptr %ptr seq_cst, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_seq_cst_i16(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i16: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.h $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i16: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.h $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i16, ptr %ptr seq_cst, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_seq_cst_i32(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i32: ++; LA32: # %bb.0: ++; LA32-NEXT: ld.w $a0, $a0, 0 ++; LA32-NEXT: dbar 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i32: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.w $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i32, ptr %ptr seq_cst, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_seq_cst_i64(ptr %ptr) { ++; LA32-LABEL: load_seq_cst_i64: ++; LA32: # %bb.0: ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: ori $a1, $zero, 5 ++; LA32-NEXT: bl %plt(__atomic_load_8) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: load_seq_cst_i64: ++; LA64: # %bb.0: ++; LA64-NEXT: ld.d $a0, $a0, 0 ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: ret ++ %val = load atomic i64, ptr %ptr seq_cst, align 8 ++ ret i64 %val ++} ++ + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: +-- +2.20.1 + diff --git a/0062-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch b/0062-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch new file mode 100644 index 0000000000000000000000000000000000000000..6016d851f39eff51070c7642811de107b44658a8 --- /dev/null +++ b/0062-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch @@ -0,0 +1,840 @@ +From 90a416b90dca40241fcf6429bd14b88e40128038 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Wed, 11 Oct 2023 10:39:13 +0800 +Subject: [PATCH 62/66] [LoongArch] Support finer-grained DBAR hints for LA664+ + (#68787) + +These are treated as DBAR 0 on older uarchs, so we can start to +unconditionally emit the new hints right away. + +Co-authored-by: WANG Rui +(cherry picked from commit 956482de13107b640cffedd08610fcccd98f708f) + +Change-Id: I98ec3538aba6658b47521917847d1a3632bc1567 +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++ + .../Target/LoongArch/LoongArchISelLowering.h | 1 + + .../Target/LoongArch/LoongArchInstrInfo.td | 24 +++++++- + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 16 ++--- + .../ir-instruction/atomic-cmpxchg.ll | 24 ++++---- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 48 +++++++-------- + .../ir-instruction/fence-singlethread.ll | 4 +- + .../CodeGen/LoongArch/ir-instruction/fence.ll | 16 ++--- + .../ir-instruction/load-store-atomic.ll | 58 +++++++++---------- + 10 files changed, 129 insertions(+), 86 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index eb78ef065b21..b348cb56c136 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -579,8 +579,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +- // TODO: acquire +- hint = 0; ++ // acquire ++ hint = 0b10100; + break; + default: + hint = 0x700; +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 5affaf37ad5a..33a3197013cc 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -159,6 +159,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + ++ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); ++ + static const ISD::CondCode FPCCToExpand[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, + ISD::SETGE, ISD::SETNE, ISD::SETGT}; +@@ -366,6 +368,8 @@ bool LoongArchTargetLowering::isOffsetFoldingLegal( + SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { ++ case ISD::ATOMIC_FENCE: ++ return lowerATOMIC_FENCE(Op, DAG); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + case ISD::GlobalAddress: +@@ -542,6 +546,22 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + return SDValue(); + } + ++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ SyncScope::ID FenceSSID = ++ static_cast(Op.getConstantOperandVal(2)); ++ ++ // singlethread fences only synchronize with signal handlers on the same ++ // thread and thus only need to preserve instruction order, not actually ++ // enforce memory ordering. ++ if (FenceSSID == SyncScope::SingleThread) ++ // MEMBARRIER is a compiler barrier; it codegens to a no-op. ++ return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); ++ ++ return Op; ++} ++ + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, + SelectionDAG &DAG) const { + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 6b5a851ec55d..23b90640a690 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -266,6 +266,7 @@ private: + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; ++ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index a9b0db30c2f6..fcbd314507a5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1590,7 +1590,29 @@ def : RegRegStPat; + + /// Atomic loads and stores + +-def : Pat<(atomic_fence timm, timm), (DBAR 0)>; ++// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++// the Linux patch revealing it [1]: ++// ++// - Bit 4: kind of constraint (0: completion, 1: ordering) ++// - Bit 3: barrier for previous read (0: true, 1: false) ++// - Bit 2: barrier for previous write (0: true, 1: false) ++// - Bit 1: barrier for succeeding read (0: true, 1: false) ++// - Bit 0: barrier for succeeding write (0: true, 1: false) ++// ++// Hint 0x700: barrier for "read after read" from the same address, which is ++// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as ++// nop if such reordering is disabled on supporting newer models.) ++// ++// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ ++// ++// Implementations without support for the finer-granularity hints simply treat ++// all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++// more precise hints right away. ++ ++def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire ++def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release ++def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel ++def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + + defm : LdPat; + defm : LdPat; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index 32106886c783..d8908acbc945 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -40,7 +40,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB0_6 + ; LA64-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -93,7 +93,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB1_6 + ; LA64-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64-NEXT: addi.w $a6, $a3, 0 +@@ -133,7 +133,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: move $a3, $a1 +@@ -171,7 +171,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB3_6 + ; LA64-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB3_1 +@@ -226,7 +226,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: b .LBB4_6 + ; LA64-NEXT: .LBB4_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -284,7 +284,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: b .LBB5_6 + ; LA64-NEXT: .LBB5_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 + ; LA64-NEXT: addi.w $a7, $a3, 0 +@@ -329,7 +329,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 +@@ -372,7 +372,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { + ; LA64-NEXT: b .LBB7_6 + ; LA64-NEXT: .LBB7_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 + ; LA64-NEXT: bne $a2, $a3, .LBB7_1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1ac20d10e587..4f25a1d69af1 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -27,7 +27,7 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB0_1 + ; LA64-NEXT: b .LBB0_4 + ; LA64-NEXT: .LBB0_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -61,7 +61,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + ; LA64-NEXT: beqz $a5, .LBB1_1 + ; LA64-NEXT: b .LBB1_4 + ; LA64-NEXT: .LBB1_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -80,7 +80,7 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB2_1 + ; LA64-NEXT: b .LBB2_4 + ; LA64-NEXT: .LBB2_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -99,7 +99,7 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ; LA64-NEXT: beqz $a4, .LBB3_1 + ; LA64-NEXT: b .LBB3_4 + ; LA64-NEXT: .LBB3_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB3_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -132,7 +132,7 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -168,7 +168,7 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: beqz $a6, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret +@@ -189,7 +189,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -210,7 +210,7 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret +@@ -245,7 +245,7 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: beqz $a6, .LBB8_1 + ; LA64-NEXT: b .LBB8_4 + ; LA64-NEXT: .LBB8_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -284,7 +284,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: beqz $a6, .LBB9_1 + ; LA64-NEXT: b .LBB9_4 + ; LA64-NEXT: .LBB9_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 +@@ -308,7 +308,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB10_1 + ; LA64-NEXT: b .LBB10_4 + ; LA64-NEXT: .LBB10_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 +@@ -331,7 +331,7 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw + ; LA64-NEXT: beqz $a4, .LBB11_1 + ; LA64-NEXT: b .LBB11_4 + ; LA64-NEXT: .LBB11_3: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 02d481cb3865..589360823b14 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -29,7 +29,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB0_6 + ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -64,7 +64,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB0_6 + ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -103,7 +103,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB1_6 + ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -138,7 +138,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB1_6 + ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -178,7 +178,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB2_6 + ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -214,7 +214,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB2_6 + ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -254,7 +254,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB3_6 + ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -290,7 +290,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB3_6 + ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1385,7 +1385,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB16_6 + ; LA64F-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1420,7 +1420,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB16_6 + ; LA64D-NEXT: .LBB16_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1459,7 +1459,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB17_6 + ; LA64F-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1494,7 +1494,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB17_6 + ; LA64D-NEXT: .LBB17_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1534,7 +1534,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB18_6 + ; LA64F-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1570,7 +1570,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB18_6 + ; LA64D-NEXT: .LBB18_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -1610,7 +1610,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB19_6 + ; LA64F-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -1646,7 +1646,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB19_6 + ; LA64D-NEXT: .LBB19_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2087,7 +2087,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB24_6 + ; LA64F-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2122,7 +2122,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB24_6 + ; LA64D-NEXT: .LBB24_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2161,7 +2161,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB25_6 + ; LA64F-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2196,7 +2196,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB25_6 + ; LA64D-NEXT: .LBB25_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2236,7 +2236,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB26_6 + ; LA64F-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2272,7 +2272,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB26_6 + ; LA64D-NEXT: .LBB26_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +@@ -2312,7 +2312,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: b .LBB27_6 + ; LA64F-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64F-NEXT: dbar 0 ++; LA64F-NEXT: dbar 20 + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +@@ -2348,7 +2348,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: b .LBB27_6 + ; LA64D-NEXT: .LBB27_5: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 +-; LA64D-NEXT: dbar 0 ++; LA64D-NEXT: dbar 20 + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +index 8d6056bc7677..a8b164a4cd3c 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence-singlethread.ll +@@ -5,12 +5,12 @@ + define void @fence_singlethread() { + ; LA32-LABEL: fence_singlethread: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: #MEMBARRIER + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_singlethread: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: #MEMBARRIER + ; LA64-NEXT: ret + fence syncscope("singlethread") seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +index 724639f3c6fb..c5b2232f9b80 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +@@ -5,12 +5,12 @@ + define void @fence_acquire() nounwind { + ; LA32-LABEL: fence_acquire: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acquire: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + fence acquire + ret void +@@ -19,12 +19,12 @@ define void @fence_acquire() nounwind { + define void @fence_release() nounwind { + ; LA32-LABEL: fence_release: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_release: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: ret + fence release + ret void +@@ -33,12 +33,12 @@ define void @fence_release() nounwind { + define void @fence_acq_rel() nounwind { + ; LA32-LABEL: fence_acq_rel: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_acq_rel: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence acq_rel + ret void +@@ -47,12 +47,12 @@ define void @fence_acq_rel() nounwind { + define void @fence_seq_cst() nounwind { + ; LA32-LABEL: fence_seq_cst: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: fence_seq_cst: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + fence seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +index deff11723d27..8b170c479eed 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +@@ -6,13 +6,13 @@ define i8 @load_acquire_i8(ptr %ptr) { + ; LA32-LABEL: load_acquire_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr acquire, align 1 + ret i8 %val +@@ -22,13 +22,13 @@ define i16 @load_acquire_i16(ptr %ptr) { + ; LA32-LABEL: load_acquire_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr acquire, align 2 + ret i16 %val +@@ -38,13 +38,13 @@ define i32 @load_acquire_i32(ptr %ptr) { + ; LA32-LABEL: load_acquire_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 20 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_acquire_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr acquire, align 4 + ret i32 %val +@@ -66,7 +66,7 @@ define i64 @load_acquire_i64(ptr %ptr) { + ; LA64-LABEL: load_acquire_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 20 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr acquire, align 8 + ret i64 %val +@@ -202,13 +202,13 @@ define i8 @load_seq_cst_i8(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i8: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.b $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i8: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.b $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i8, ptr %ptr seq_cst, align 1 + ret i8 %val +@@ -218,13 +218,13 @@ define i16 @load_seq_cst_i16(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i16: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.h $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i16: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.h $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i16, ptr %ptr seq_cst, align 2 + ret i16 %val +@@ -234,13 +234,13 @@ define i32 @load_seq_cst_i32(ptr %ptr) { + ; LA32-LABEL: load_seq_cst_i32: + ; LA32: # %bb.0: + ; LA32-NEXT: ld.w $a0, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: load_seq_cst_i32: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.w $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i32, ptr %ptr seq_cst, align 4 + ret i32 %val +@@ -262,7 +262,7 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + ; LA64-LABEL: load_seq_cst_i64: + ; LA64: # %bb.0: + ; LA64-NEXT: ld.d $a0, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + %val = load atomic i64, ptr %ptr seq_cst, align 8 + ret i64 %val +@@ -271,13 +271,13 @@ define i64 @load_seq_cst_i64(ptr %ptr) { + define void @store_release_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_release_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.b $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.b $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr release, align 1 +@@ -287,13 +287,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { + define void @store_release_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_release_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.h $a1, $a0, 0 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_release_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 18 + ; LA64-NEXT: st.h $a1, $a0, 0 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr release, align 2 +@@ -303,7 +303,7 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { + define void @store_release_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_release_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 18 + ; LA32-NEXT: st.w $a1, $a0, 0 + ; LA32-NEXT: ret + ; +@@ -465,16 +465,16 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + ; LA32-LABEL: store_seq_cst_i8: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.b $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i8: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.b $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +@@ -483,16 +483,16 @@ define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { + define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + ; LA32-LABEL: store_seq_cst_i16: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.h $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i16: + ; LA64: # %bb.0: +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: st.h $a1, $a0, 0 +-; LA64-NEXT: dbar 0 ++; LA64-NEXT: dbar 16 + ; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +@@ -501,9 +501,9 @@ define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { + define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { + ; LA32-LABEL: store_seq_cst_i32: + ; LA32: # %bb.0: +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: st.w $a1, $a0, 0 +-; LA32-NEXT: dbar 0 ++; LA32-NEXT: dbar 16 + ; LA32-NEXT: ret + ; + ; LA64-LABEL: store_seq_cst_i32: +-- +2.20.1 + diff --git a/0063-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch b/0063-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch new file mode 100644 index 0000000000000000000000000000000000000000..338aeaca387469b781d5aeef1a955c364f7b849c --- /dev/null +++ b/0063-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch @@ -0,0 +1,652 @@ +From 6722b1cd4fa76390264167efd95871ef8cbc9750 Mon Sep 17 00:00:00 2001 +From: Weining Lu +Date: Tue, 17 Oct 2023 17:41:32 +0800 +Subject: [PATCH 63/66] [LoongArch] Precommit a test for atomic cmpxchg + optmization + +(cherry picked from commit b2773d170cb4bdb4b19ba801b5eb55395024b3ae) + +Change-Id: Iebb6cd2344f059238e3b40142300f0eee3f509b6 +--- + .../ir-instruction/atomic-cmpxchg.ll | 385 +++++++++++------- + 1 file changed, 245 insertions(+), 140 deletions(-) + +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 4f25a1d69af1..174bb9d0ff7d 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -106,6 +106,111 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind + ret void + } + ++define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: andi $a1, $a1, 255 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: andi $a2, $a2, 255 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: ori $a4, $zero, 255 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB4_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB4_1 ++; LA64-NEXT: b .LBB4_4 ++; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $zero, -4 ++; LA64-NEXT: and $a3, $a0, $a3 ++; LA64-NEXT: slli.d $a0, $a0, 3 ++; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; LA64-NEXT: sll.w $a1, $a1, $a0 ++; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 ++; LA64-NEXT: sll.w $a2, $a2, $a0 ++; LA64-NEXT: lu12i.w $a4, 15 ++; LA64-NEXT: ori $a4, $a4, 4095 ++; LA64-NEXT: sll.w $a0, $a4, $a0 ++; LA64-NEXT: addi.w $a0, $a0, 0 ++; LA64-NEXT: addi.w $a2, $a2, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 ++; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a4, $a3, 0 ++; LA64-NEXT: and $a5, $a4, $a0 ++; LA64-NEXT: bne $a5, $a1, .LBB5_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: andn $a5, $a4, $a0 ++; LA64-NEXT: or $a5, $a5, $a2 ++; LA64-NEXT: sc.w $a5, $a3, 0 ++; LA64-NEXT: beqz $a5, .LBB5_1 ++; LA64-NEXT: b .LBB5_4 ++; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB6_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.w $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB6_1 ++; LA64-NEXT: b .LBB6_4 ++; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $a3, $a0, 0 ++; LA64-NEXT: bne $a3, $a1, .LBB7_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: move $a4, $a2 ++; LA64-NEXT: sc.d $a4, $a0, 0 ++; LA64-NEXT: beqz $a4, .LBB7_1 ++; LA64-NEXT: b .LBB7_4 ++; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: ret ++ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic ++ ret void ++} ++ + define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: + ; LA64: # %bb.0: +@@ -121,19 +226,19 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB4_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB8_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB4_1 +-; LA64-NEXT: b .LBB4_4 +-; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: beqz $a6, .LBB8_1 ++; LA64-NEXT: b .LBB8_4 ++; LA64-NEXT: .LBB8_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB4_4: ++; LA64-NEXT: .LBB8_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire +@@ -157,19 +262,19 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB5_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB9_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB5_1 +-; LA64-NEXT: b .LBB5_4 +-; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: beqz $a6, .LBB9_1 ++; LA64-NEXT: b .LBB9_4 ++; LA64-NEXT: .LBB9_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB5_4: ++; LA64-NEXT: .LBB9_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire +@@ -180,17 +285,17 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB6_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB6_1 +-; LA64-NEXT: b .LBB6_4 +-; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: beqz $a4, .LBB10_1 ++; LA64-NEXT: b .LBB10_4 ++; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB6_4: ++; LA64-NEXT: .LBB10_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -201,17 +306,17 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB7_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB11_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB7_1 +-; LA64-NEXT: b .LBB7_4 +-; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: beqz $a4, .LBB11_1 ++; LA64-NEXT: b .LBB11_4 ++; LA64-NEXT: .LBB11_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB7_4: ++; LA64-NEXT: .LBB11_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire +@@ -234,19 +339,19 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB8_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB8_1 +-; LA64-NEXT: b .LBB8_4 +-; LA64-NEXT: .LBB8_3: ++; LA64-NEXT: beqz $a6, .LBB12_1 ++; LA64-NEXT: b .LBB12_4 ++; LA64-NEXT: .LBB12_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB8_4: ++; LA64-NEXT: .LBB12_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -273,19 +378,19 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB9_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB9_1 +-; LA64-NEXT: b .LBB9_4 +-; LA64-NEXT: .LBB9_3: ++; LA64-NEXT: beqz $a6, .LBB13_1 ++; LA64-NEXT: b .LBB13_4 ++; LA64-NEXT: .LBB13_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB9_4: ++; LA64-NEXT: .LBB13_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -299,17 +404,17 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB10_1 +-; LA64-NEXT: b .LBB10_4 +-; LA64-NEXT: .LBB10_3: ++; LA64-NEXT: beqz $a4, .LBB14_1 ++; LA64-NEXT: b .LBB14_4 ++; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB10_4: ++; LA64-NEXT: .LBB14_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -322,17 +427,17 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB11_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB11_1 +-; LA64-NEXT: b .LBB11_4 +-; LA64-NEXT: .LBB11_3: ++; LA64-NEXT: beqz $a4, .LBB15_1 ++; LA64-NEXT: b .LBB15_4 ++; LA64-NEXT: .LBB15_3: + ; LA64-NEXT: dbar 20 +-; LA64-NEXT: .LBB11_4: ++; LA64-NEXT: .LBB15_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +@@ -356,19 +461,19 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB12_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB12_1 +-; LA64-NEXT: b .LBB12_4 +-; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: beqz $a5, .LBB16_1 ++; LA64-NEXT: b .LBB16_4 ++; LA64-NEXT: .LBB16_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB12_4: ++; LA64-NEXT: .LBB16_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +@@ -390,19 +495,19 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a2, $a2, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a4, $a3, 0 + ; LA64-NEXT: and $a5, $a4, $a0 +-; LA64-NEXT: bne $a5, $a1, .LBB13_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: bne $a5, $a1, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 + ; LA64-NEXT: andn $a5, $a4, $a0 + ; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: sc.w $a5, $a3, 0 +-; LA64-NEXT: beqz $a5, .LBB13_1 +-; LA64-NEXT: b .LBB13_4 +-; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: beqz $a5, .LBB17_1 ++; LA64-NEXT: b .LBB17_4 ++; LA64-NEXT: .LBB17_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB13_4: ++; LA64-NEXT: .LBB17_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +@@ -411,17 +516,17 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB14_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB14_1 +-; LA64-NEXT: b .LBB14_4 +-; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: beqz $a4, .LBB18_1 ++; LA64-NEXT: b .LBB18_4 ++; LA64-NEXT: .LBB18_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB14_4: ++; LA64-NEXT: .LBB18_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +@@ -430,17 +535,17 @@ define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounw + define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB15_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB15_1 +-; LA64-NEXT: b .LBB15_4 +-; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: beqz $a4, .LBB19_1 ++; LA64-NEXT: b .LBB19_4 ++; LA64-NEXT: .LBB19_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB15_4: ++; LA64-NEXT: .LBB19_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +@@ -461,19 +566,19 @@ define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: andi $a1, $a1, 255 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB16_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB16_1 +-; LA64-NEXT: b .LBB16_4 +-; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: beqz $a6, .LBB20_1 ++; LA64-NEXT: b .LBB20_4 ++; LA64-NEXT: .LBB20_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB16_4: ++; LA64-NEXT: .LBB20_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic +@@ -497,19 +602,19 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 + ; LA64-NEXT: sll.w $a1, $a1, $a0 + ; LA64-NEXT: addi.w $a1, $a1, 0 +-; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a4 +-; LA64-NEXT: bne $a6, $a1, .LBB17_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a4 + ; LA64-NEXT: or $a6, $a6, $a2 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB17_1 +-; LA64-NEXT: b .LBB17_4 +-; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: beqz $a6, .LBB21_1 ++; LA64-NEXT: b .LBB21_4 ++; LA64-NEXT: .LBB21_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB17_4: ++; LA64-NEXT: .LBB21_4: + ; LA64-NEXT: srl.w $a0, $a5, $a0 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic +@@ -520,17 +625,17 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB18_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB18_1 +-; LA64-NEXT: b .LBB18_4 +-; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: beqz $a4, .LBB22_1 ++; LA64-NEXT: b .LBB22_4 ++; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB18_4: ++; LA64-NEXT: .LBB22_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +@@ -541,17 +646,17 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB19_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB19_1 +-; LA64-NEXT: b .LBB19_4 +-; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: beqz $a4, .LBB23_1 ++; LA64-NEXT: b .LBB23_4 ++; LA64-NEXT: .LBB23_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB19_4: ++; LA64-NEXT: .LBB23_4: + ; LA64-NEXT: move $a0, $a3 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic +@@ -574,19 +679,19 @@ define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) noun + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB20_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB20_1 +-; LA64-NEXT: b .LBB20_4 +-; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: beqz $a6, .LBB24_1 ++; LA64-NEXT: b .LBB24_4 ++; LA64-NEXT: .LBB24_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB20_4: ++; LA64-NEXT: .LBB24_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -613,19 +718,19 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: addi.w $a2, $a4, 0 +-; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a5, $a3, 0 + ; LA64-NEXT: and $a6, $a5, $a2 +-; LA64-NEXT: bne $a6, $a1, .LBB21_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: bne $a6, $a1, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 + ; LA64-NEXT: andn $a6, $a5, $a2 + ; LA64-NEXT: or $a6, $a6, $a0 + ; LA64-NEXT: sc.w $a6, $a3, 0 +-; LA64-NEXT: beqz $a6, .LBB21_1 +-; LA64-NEXT: b .LBB21_4 +-; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: beqz $a6, .LBB25_1 ++; LA64-NEXT: b .LBB25_4 ++; LA64-NEXT: .LBB25_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB21_4: ++; LA64-NEXT: .LBB25_4: + ; LA64-NEXT: and $a0, $a5, $a4 + ; LA64-NEXT: addi.w $a0, $a0, 0 + ; LA64-NEXT: xor $a0, $a1, $a0 +@@ -639,17 +744,17 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB26_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB22_1 +-; LA64-NEXT: b .LBB22_4 +-; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: beqz $a4, .LBB26_1 ++; LA64-NEXT: b .LBB26_4 ++; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB22_4: ++; LA64-NEXT: .LBB26_4: + ; LA64-NEXT: addi.w $a0, $a1, 0 + ; LA64-NEXT: xor $a0, $a3, $a0 + ; LA64-NEXT: sltui $a0, $a0, 1 +@@ -662,17 +767,17 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { + ; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: + ; LA64: # %bb.0: +-; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.d $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB23_3 +-; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: bne $a3, $a1, .LBB27_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.d $a4, $a0, 0 +-; LA64-NEXT: beqz $a4, .LBB23_1 +-; LA64-NEXT: b .LBB23_4 +-; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: beqz $a4, .LBB27_1 ++; LA64-NEXT: b .LBB27_4 ++; LA64-NEXT: .LBB27_3: + ; LA64-NEXT: dbar 1792 +-; LA64-NEXT: .LBB23_4: ++; LA64-NEXT: .LBB27_4: + ; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret +-- +2.20.1 + diff --git a/0064-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch b/0064-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch new file mode 100644 index 0000000000000000000000000000000000000000..77cf772f34401df009c575c3fe62525a5e6a6418 --- /dev/null +++ b/0064-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch @@ -0,0 +1,205 @@ +From 0604fa00628899bf420fe2d0621b3fffbe52547c Mon Sep 17 00:00:00 2001 +From: Lu Weining <90239436+SixWeining@users.noreply.github.com> +Date: Thu, 19 Oct 2023 09:21:51 +0800 +Subject: [PATCH 64/66] [LoongArch] Improve codegen for atomic cmpxchg ops + (#69339) + +PR #67391 improved atomic codegen by handling memory ordering specified +by the `cmpxchg` instruction. An acquire barrier needs to be generated +when memory ordering includes an acquire operation. This PR improves the +codegen further by only handling the failure ordering. + +(cherry picked from commit 78abc45c44cdadf76b30e1f3dc24936bb5627d68) + +Change-Id: I00391ad1aaf5c64ae95cc0f4f84a0b480a2fb5b3 +--- + .../LoongArchExpandAtomicPseudoInsts.cpp | 4 +- + .../LoongArch/LoongArchISelLowering.cpp | 7 ++- + .../Target/LoongArch/LoongArchInstrInfo.td | 55 ++++++++++++++++--- + .../ir-instruction/atomic-cmpxchg.ll | 8 +-- + 4 files changed, 56 insertions(+), 18 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +index b348cb56c136..18a532b55ee5 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + } + +- AtomicOrdering Ordering = ++ AtomicOrdering FailureOrdering = + static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + int hint; + +- switch (Ordering) { ++ switch (FailureOrdering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 33a3197013cc..99328f09921f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( + Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, + Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { +- Value *Ordering = +- Builder.getIntN(Subtarget.getGRLen(), static_cast(Ord)); ++ AtomicOrdering FailOrd = CI->getFailureOrdering(); ++ Value *FailureOrdering = ++ Builder.getIntN(Subtarget.getGRLen(), static_cast(FailOrd)); + + // TODO: Support cmpxchg on LA32. + Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; +@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( +- MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); ++ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index fcbd314507a5..ab1890556814 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; + + class PseudoCmpXchg + : Pseudo<(outs GPR:$res, GPR:$scratch), +- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { ++ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; + def PseudoMaskedCmpXchg32 + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, +- grlenimm:$ordering)> { ++ grlenimm:$fail_order)> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; +@@ -1785,6 +1785,43 @@ class AtomicPat + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; + ++// These atomic cmpxchg PatFrags only care about the failure ordering. ++// The PatFrags defined by multiclass `ternary_atomic_op_ord` in ++// TargetSelectionDAG.td care about the merged memory ordering that is the ++// stronger one between success and failure. But for LoongArch LL-SC we only ++// need to care about the failure ordering as explained in PR #67391. So we ++// define these PatFrags that will be used to define cmpxchg pats below. ++multiclass ternary_atomic_op_failure_ord { ++ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Monotonic; ++ }]>; ++ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Acquire; ++ }]>; ++ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Release; ++ }]>; ++ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::AcquireRelease; ++ }]>; ++ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::SequentiallyConsistent; ++ }]>; ++} ++ ++defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; ++ + let Predicates = [IsLA64] in { + def : AtomicPat; +@@ -1847,24 +1884,24 @@ def : AtomicPat { +- def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; +- def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; +- def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; +- def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; +- def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), ++ def : Pat<(vt (!cast(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; + } + + defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; + defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; + def : Pat<(int_loongarch_masked_cmpxchg_i64 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order), + (PseudoMaskedCmpXchg32 +- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; ++ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>; + + def : PseudoMaskedAMMinMaxPat; +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 174bb9d0ff7d..1dd3f39852d8 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { + ; LA64-NEXT: beqz $a5, .LBB4_1 + ; LA64-NEXT: b .LBB4_4 + ; LA64-NEXT: .LBB4_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB4_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic +@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + ; LA64-NEXT: beqz $a5, .LBB5_1 + ; LA64-NEXT: b .LBB5_4 + ; LA64-NEXT: .LBB5_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB5_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic +@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB6_1 + ; LA64-NEXT: b .LBB6_4 + ; LA64-NEXT: .LBB6_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB6_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic +@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin + ; LA64-NEXT: beqz $a4, .LBB7_1 + ; LA64-NEXT: b .LBB7_4 + ; LA64-NEXT: .LBB7_3: +-; LA64-NEXT: dbar 20 ++; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB7_4: + ; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic +-- +2.20.1 + diff --git a/0065-LoongArch-Override-LoongArchTargetLowering-getExtend.patch b/0065-LoongArch-Override-LoongArchTargetLowering-getExtend.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e6a10bd35a4163fd0827630d4005018165ea805 --- /dev/null +++ b/0065-LoongArch-Override-LoongArchTargetLowering-getExtend.patch @@ -0,0 +1,1143 @@ +From 120922e20f54392ccb9e60050e8c2531e284b8aa Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Mon, 4 Mar 2024 08:38:52 +0800 +Subject: [PATCH 65/66] [LoongArch] Override + LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) + +This patch aims to solve Firefox issue: +https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 + +Similar to 616289ed2922. Currently LoongArch uses an ll.[wd]/sc.[wd] +loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is +full-width (i.e. the `bne` instruction), we must sign extend the input +comparsion argument. + +Note that LoongArch ISA manual V1.1 has introduced compare-and-swap +instructions. We would change the implementation (return `ANY_EXTEND`) +when we support them. + +(cherry picked from commit 5f058aa211995d2f0df2a0e063532832569cb7a8) +(cherry picked from commit ea6c457b8dd2d0e6a7f05b4a5bdd2686085e1ec0) + +Change-Id: I463e8acf2eebf981bdb9d38da5040b7c2873249f +--- + .../LoongArch/LoongArchISelLowering.cpp | 5 + + .../Target/LoongArch/LoongArchISelLowering.h | 2 + + .../LoongArch/atomicrmw-uinc-udec-wrap.ll | 120 +++++++------ + .../ir-instruction/atomic-cmpxchg.ll | 25 +-- + .../LoongArch/ir-instruction/atomicrmw-fp.ll | 160 +++++++++--------- + 5 files changed, 159 insertions(+), 153 deletions(-) + +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 99328f09921f..4fc2b4709840 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -4893,3 +4893,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { + + return !isa(Y); + } ++ ++ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { ++ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. ++ return ISD::SIGN_EXTEND; ++} +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 23b90640a690..2c9826a13237 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -203,6 +203,8 @@ public: + return ISD::SIGN_EXTEND; + } + ++ ISD::NodeType getExtendForAtomicCmpSwapArg() const override; ++ + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +index d8908acbc945..f0baf19bcf0e 100644 +--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +@@ -26,15 +26,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a5, $a5, 255 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB0_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB0_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB0_3 + ; LA64-NEXT: b .LBB0_6 +@@ -43,11 +44,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB0_1 ++; LA64-NEXT: bne $a3, $a6, .LBB0_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -79,15 +78,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 + ; LA64-NEXT: sll.w $a5, $a5, $a0 + ; LA64-NEXT: and $a6, $a3, $a4 +-; LA64-NEXT: or $a6, $a6, $a5 ++; LA64-NEXT: or $a5, $a6, $a5 ++; LA64-NEXT: addi.w $a6, $a3, 0 + ; LA64-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a5, $a2, 0 +-; LA64-NEXT: bne $a5, $a3, .LBB1_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a6, .LBB1_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 ++; LA64-NEXT: move $a7, $a5 + ; LA64-NEXT: sc.w $a7, $a2, 0 + ; LA64-NEXT: beqz $a7, .LBB1_3 + ; LA64-NEXT: b .LBB1_6 +@@ -96,11 +96,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +-; LA64-NEXT: addi.w $a6, $a3, 0 +-; LA64-NEXT: move $a3, $a5 +-; LA64-NEXT: bne $a5, $a6, .LBB1_1 ++; LA64-NEXT: bne $a3, $a6, .LBB1_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a5, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -109,37 +107,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_uinc_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a3, $a0, 0 +-; LA64-NEXT: addi.w $a2, $a1, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB2_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB2_3 Depth 2 +-; LA64-NEXT: addi.w $a4, $a3, 0 +-; LA64-NEXT: sltu $a1, $a4, $a2 +-; LA64-NEXT: xori $a1, $a1, 1 +-; LA64-NEXT: addi.d $a5, $a3, 1 +-; LA64-NEXT: masknez $a5, $a5, $a1 ++; LA64-NEXT: addi.w $a3, $a2, 0 ++; LA64-NEXT: sltu $a4, $a3, $a1 ++; LA64-NEXT: xori $a4, $a4, 1 ++; LA64-NEXT: addi.d $a2, $a2, 1 ++; LA64-NEXT: masknez $a4, $a2, $a4 + ; LA64-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a1, $a0, 0 +-; LA64-NEXT: bne $a1, $a3, .LBB2_5 ++; LA64-NEXT: ll.w $a2, $a0, 0 ++; LA64-NEXT: bne $a2, $a3, .LBB2_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +-; LA64-NEXT: move $a6, $a5 +-; LA64-NEXT: sc.w $a6, $a0, 0 +-; LA64-NEXT: beqz $a6, .LBB2_3 ++; LA64-NEXT: move $a5, $a4 ++; LA64-NEXT: sc.w $a5, $a0, 0 ++; LA64-NEXT: beqz $a5, .LBB2_3 + ; LA64-NEXT: b .LBB2_6 + ; LA64-NEXT: .LBB2_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +-; LA64-NEXT: move $a3, $a1 +-; LA64-NEXT: bne $a1, $a4, .LBB2_1 ++; LA64-NEXT: bne $a2, $a3, .LBB2_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: move $a0, $a1 ++; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +@@ -212,15 +209,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: andi $a6, $a6, 255 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB4_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB4_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB4_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB4_3 + ; LA64-NEXT: b .LBB4_6 +@@ -229,11 +227,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB4_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB4_1 ++; LA64-NEXT: bne $a3, $a7, .LBB4_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +@@ -270,15 +266,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 + ; LA64-NEXT: sll.w $a6, $a6, $a0 + ; LA64-NEXT: and $a7, $a3, $a4 +-; LA64-NEXT: or $a7, $a7, $a6 ++; LA64-NEXT: or $a6, $a7, $a6 ++; LA64-NEXT: addi.w $a7, $a3, 0 + ; LA64-NEXT: .LBB5_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +-; LA64-NEXT: ll.w $a6, $a2, 0 +-; LA64-NEXT: bne $a6, $a3, .LBB5_5 ++; LA64-NEXT: ll.w $a3, $a2, 0 ++; LA64-NEXT: bne $a3, $a7, .LBB5_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +-; LA64-NEXT: move $t0, $a7 ++; LA64-NEXT: move $t0, $a6 + ; LA64-NEXT: sc.w $t0, $a2, 0 + ; LA64-NEXT: beqz $t0, .LBB5_3 + ; LA64-NEXT: b .LBB5_6 +@@ -287,11 +284,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB5_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +-; LA64-NEXT: addi.w $a7, $a3, 0 +-; LA64-NEXT: move $a3, $a6 +-; LA64-NEXT: bne $a6, $a7, .LBB5_1 ++; LA64-NEXT: bne $a3, $a7, .LBB5_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end +-; LA64-NEXT: srl.w $a0, $a6, $a0 ++; LA64-NEXT: srl.w $a0, $a3, $a0 + ; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +@@ -300,22 +295,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { + define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-LABEL: atomicrmw_udec_wrap_i32: + ; LA64: # %bb.0: +-; LA64-NEXT: ld.w $a4, $a0, 0 ++; LA64-NEXT: ld.w $a2, $a0, 0 + ; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .p2align 4, , 16 + ; LA64-NEXT: .LBB6_1: # %atomicrmw.start + ; LA64-NEXT: # =>This Loop Header: Depth=1 + ; LA64-NEXT: # Child Loop BB6_3 Depth 2 +-; LA64-NEXT: addi.w $a5, $a4, 0 +-; LA64-NEXT: sltu $a2, $a3, $a5 +-; LA64-NEXT: addi.d $a6, $a4, -1 +-; LA64-NEXT: masknez $a6, $a6, $a2 +-; LA64-NEXT: maskeqz $a2, $a1, $a2 +-; LA64-NEXT: or $a2, $a2, $a6 +-; LA64-NEXT: sltui $a6, $a5, 1 +-; LA64-NEXT: masknez $a2, $a2, $a6 +-; LA64-NEXT: maskeqz $a6, $a1, $a6 +-; LA64-NEXT: or $a6, $a6, $a2 ++; LA64-NEXT: addi.w $a4, $a2, 0 ++; LA64-NEXT: sltu $a5, $a3, $a4 ++; LA64-NEXT: addi.d $a2, $a2, -1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a2, $a5, $a2 ++; LA64-NEXT: sltui $a5, $a4, 1 ++; LA64-NEXT: masknez $a2, $a2, $a5 ++; LA64-NEXT: maskeqz $a5, $a1, $a5 ++; LA64-NEXT: or $a5, $a5, $a2 + ; LA64-NEXT: .LBB6_3: # %atomicrmw.start + ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 + ; LA64-NEXT: # => This Inner Loop Header: Depth=2 +@@ -323,17 +318,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { + ; LA64-NEXT: bne $a2, $a4, .LBB6_5 + ; LA64-NEXT: # %bb.4: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +-; LA64-NEXT: move $a7, $a6 +-; LA64-NEXT: sc.w $a7, $a0, 0 +-; LA64-NEXT: beqz $a7, .LBB6_3 ++; LA64-NEXT: move $a6, $a5 ++; LA64-NEXT: sc.w $a6, $a0, 0 ++; LA64-NEXT: beqz $a6, .LBB6_3 + ; LA64-NEXT: b .LBB6_6 + ; LA64-NEXT: .LBB6_5: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB6_6: # %atomicrmw.start + ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +-; LA64-NEXT: move $a4, $a2 +-; LA64-NEXT: bne $a2, $a5, .LBB6_1 ++; LA64-NEXT: bne $a2, $a4, .LBB6_1 + ; LA64-NEXT: # %bb.2: # %atomicrmw.end + ; LA64-NEXT: move $a0, $a2 + ; LA64-NEXT: ret +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +index 1dd3f39852d8..ebb09640e6c9 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +@@ -71,6 +71,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind + define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB2_3 +@@ -176,6 +177,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin + define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB6_3 +@@ -285,9 +287,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou + define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB10_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB10_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -296,7 +299,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou + ; LA64-NEXT: .LBB10_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB10_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -404,6 +407,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw + define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB14_3 +@@ -415,8 +419,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw + ; LA64-NEXT: .LBB14_3: + ; LA64-NEXT: dbar 20 + ; LA64-NEXT: .LBB14_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire +@@ -516,6 +519,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw + define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB18_3 +@@ -625,9 +629,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) + define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a3, $a1, 0 + ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +-; LA64-NEXT: ll.w $a3, $a0, 0 +-; LA64-NEXT: bne $a3, $a1, .LBB22_3 ++; LA64-NEXT: ll.w $a1, $a0, 0 ++; LA64-NEXT: bne $a1, $a3, .LBB22_3 + ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 + ; LA64-NEXT: move $a4, $a2 + ; LA64-NEXT: sc.w $a4, $a0, 0 +@@ -636,7 +641,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) + ; LA64-NEXT: .LBB22_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB22_4: +-; LA64-NEXT: move $a0, $a3 ++; LA64-NEXT: move $a0, $a1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 +@@ -744,6 +749,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n + define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { + ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: + ; LA64: # %bb.0: ++; LA64-NEXT: addi.w $a1, $a1, 0 + ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 + ; LA64-NEXT: ll.w $a3, $a0, 0 + ; LA64-NEXT: bne $a3, $a1, .LBB26_3 +@@ -755,8 +761,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n + ; LA64-NEXT: .LBB26_3: + ; LA64-NEXT: dbar 1792 + ; LA64-NEXT: .LBB26_4: +-; LA64-NEXT: addi.w $a0, $a1, 0 +-; LA64-NEXT: xor $a0, $a3, $a0 ++; LA64-NEXT: xor $a0, $a3, $a1 + ; LA64-NEXT: sltui $a0, $a0, 1 + ; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 589360823b14..4d8160d70803 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB0_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB0_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 +@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB1_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB1_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 +@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB2_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB2_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 +@@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB3_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB3_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 +@@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB8_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB8_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 +@@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB9_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB9_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 +@@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB10_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB10_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 +@@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { + ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB11_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB11_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 +@@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB16_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB16_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 +@@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB17_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB17_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 +@@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB18_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB18_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 +@@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { + ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB19_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB19_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 +@@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB24_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB24_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 +@@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB25_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB25_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 +@@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB26_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB26_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 +@@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { + ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB27_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB27_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 +@@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB32_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB32_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 +@@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB33_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB33_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 +@@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB34_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB34_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 +@@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64F-NEXT: movfr2gr.s $a1, $fa2 + ; LA64F-NEXT: movfr2gr.s $a2, $fa0 ++; LA64F-NEXT: addi.w $a2, $a2, 0 + ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +@@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64F-NEXT: movgr2fr.w $fa0, $a3 +-; LA64F-NEXT: addi.w $a1, $a2, 0 +-; LA64F-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64F-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64F-NEXT: # %bb.2: # %atomicrmw.end + ; LA64F-NEXT: ret + ; +@@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 + ; LA64D-NEXT: movfr2gr.s $a1, $fa2 + ; LA64D-NEXT: movfr2gr.s $a2, $fa0 ++; LA64D-NEXT: addi.w $a2, $a2, 0 + ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start + ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 + ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +@@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { + ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start + ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 + ; LA64D-NEXT: movgr2fr.w $fa0, $a3 +-; LA64D-NEXT: addi.w $a1, $a2, 0 +-; LA64D-NEXT: bne $a3, $a1, .LBB35_1 ++; LA64D-NEXT: bne $a3, $a2, .LBB35_1 + ; LA64D-NEXT: # %bb.2: # %atomicrmw.end + ; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 +-- +2.20.1 + diff --git a/0066-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch b/0066-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc8d9daecea86c6d58010a6c6dd2c1705c5b16e4 --- /dev/null +++ b/0066-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch @@ -0,0 +1,36 @@ +From 9ad8624e947a692ec9409fadebd946aaba5dc8f7 Mon Sep 17 00:00:00 2001 +From: Lu Weining +Date: Thu, 19 Oct 2023 10:16:13 +0800 +Subject: [PATCH 66/66] [Memory] Call __clear_cache in + InvalidateInstructionCache on LoongArch (#67285) + +As the comments of `InvalidateInstructionCache`: Before the JIT can run +a block of code that has been emitted it must invalidate the instruction +cache on some platforms. I think it applies to LoongArch as LoongArch +has a weak memory-model. But I'm not able to write a test to demonstrate +this issue. Perhaps self-modifing code should be wrote? + +(cherry picked from commit fb366581e7d67df7d9a98605fd65a7e7908451e7) + +Change-Id: I05f81ed770bbc11e74f518d8930f9c538b5ffc50 +--- + llvm/lib/Support/Unix/Memory.inc | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc +index 4c8f6b2ea7d3..69bd1164343d 100644 +--- a/llvm/lib/Support/Unix/Memory.inc ++++ b/llvm/lib/Support/Unix/Memory.inc +@@ -237,7 +237,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, size_t Len) { + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +-#elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ ++#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ ++ defined(__mips__)) && \ + defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast(Addr); +-- +2.20.1 + diff --git a/0067-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch b/0067-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch new file mode 100644 index 0000000000000000000000000000000000000000..4528f50f5eed02080e361a10047266e91558a91a --- /dev/null +++ b/0067-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch @@ -0,0 +1,87 @@ +From 4aec2da60ce3f639e31d81406c09d5c88b3b8f53 Mon Sep 17 00:00:00 2001 +From: Florian Hahn +Date: Wed, 20 Dec 2023 16:56:15 +0100 +Subject: [PATCH 2/3] [ARM] Check all terms in emitPopInst when clearing + Restored for LR. (#75527) + +emitPopInst checks a single function exit MBB. If other paths also exit +the function and any of there terminators uses LR implicitly, it is not +save to clear the Restored bit. + +Check all terminators for the function before clearing Restored. + +This fixes a mis-compile in outlined-fn-may-clobber-lr-in-caller.ll +where the machine-outliner previously introduced BLs that clobbered LR +which in turn is used by the tail call return. + +Alternative to #73553 +--- + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 30 +++++++++++++++++++++--- + llvm/lib/Target/ARM/ARMFrameLowering.h | 3 +++ + 2 files changed, 30 insertions(+), 3 deletions(-) + +diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp +index 4496d4928ebe..650f4650eef0 100644 +--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp ++++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp +@@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, + // Fold the return instruction into the LDM. + DeleteRet = true; + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; +- // We 'restore' LR into PC so it is not live out of the return block: +- // Clear Restored bit. +- Info.setRestored(false); + } + + // If NoGap is true, pop consecutive registers and then leave the rest +@@ -2769,6 +2766,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, + AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); + } + ++void ARMFrameLowering::processFunctionBeforeFrameFinalized( ++ MachineFunction &MF, RegScavenger *RS) const { ++ TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); ++ ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ if (!MFI.isCalleeSavedInfoValid()) ++ return; ++ ++ // Check if all terminators do not implicitly use LR. Then we can 'restore' LR ++ // into PC so it is not live out of the return block: Clear the Restored bit ++ // in that case. ++ for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { ++ if (Info.getReg() != ARM::LR) ++ continue; ++ if (all_of(MF, [](const MachineBasicBlock &MBB) { ++ return all_of(MBB.terminators(), [](const MachineInstr &Term) { ++ return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || ++ Term.getOpcode() == ARM::t2LDMIA_RET || ++ Term.getOpcode() == ARM::tPOP_RET; ++ }); ++ })) { ++ Info.setRestored(false); ++ break; ++ } ++ } ++} ++ + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, + BitVector &SavedRegs) const { + TargetFrameLowering::getCalleeSaves(MF, SavedRegs); +diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h +index 16f2ce6bea6f..8d2b8beb9a58 100644 +--- a/llvm/lib/Target/ARM/ARMFrameLowering.h ++++ b/llvm/lib/Target/ARM/ARMFrameLowering.h +@@ -59,6 +59,9 @@ public: + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + ++ void processFunctionBeforeFrameFinalized( ++ MachineFunction &MF, RegScavenger *RS = nullptr) const override; ++ + void adjustForSegmentedStacks(MachineFunction &MF, + MachineBasicBlock &MBB) const override; + +-- +2.33.0 + diff --git a/0068-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch b/0068-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb34372c85aefb4a8e2d514f926adad455483eb9 --- /dev/null +++ b/0068-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch @@ -0,0 +1,116 @@ +From 369bfc8ea8c0a9da51b4bd964f0045cb389c3c2f Mon Sep 17 00:00:00 2001 +From: ostannard +Date: Mon, 26 Feb 2024 12:23:25 +0000 +Subject: [PATCH 3/3] [ARM] Update IsRestored for LR based on all returns + (#82745) + +PR #75527 fixed ARMFrameLowering to set the IsRestored flag for LR based +on all of the return instructions in the function, not just one. +However, there is also code in ARMLoadStoreOptimizer which changes +return instructions, but it set IsRestored based on the one instruction +it changed, not the whole function. + +The fix is to factor out the code added in #75527, and also call it from +ARMLoadStoreOptimizer if it made a change to return instructions. + +Fixes #80287. +--- + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 11 +++++---- + llvm/lib/Target/ARM/ARMFrameLowering.h | 4 ++++ + llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 23 ++++++++----------- + 3 files changed, 21 insertions(+), 17 deletions(-) + +diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp +index 650f4650eef0..008ba4e5924b 100644 +--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp ++++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp +@@ -2766,10 +2766,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, + AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); + } + +-void ARMFrameLowering::processFunctionBeforeFrameFinalized( +- MachineFunction &MF, RegScavenger *RS) const { +- TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); +- ++void ARMFrameLowering::updateLRRestored(MachineFunction &MF) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; +@@ -2793,6 +2790,12 @@ void ARMFrameLowering::processFunctionBeforeFrameFinalized( + } + } + ++void ARMFrameLowering::processFunctionBeforeFrameFinalized( ++ MachineFunction &MF, RegScavenger *RS) const { ++ TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); ++ updateLRRestored(MF); ++} ++ + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, + BitVector &SavedRegs) const { + TargetFrameLowering::getCalleeSaves(MF, SavedRegs); +diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h +index 8d2b8beb9a58..3c7358d8cd53 100644 +--- a/llvm/lib/Target/ARM/ARMFrameLowering.h ++++ b/llvm/lib/Target/ARM/ARMFrameLowering.h +@@ -59,6 +59,10 @@ public: + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + ++ /// Update the IsRestored flag on LR if it is spilled, based on the return ++ /// instructions. ++ static void updateLRRestored(MachineFunction &MF); ++ + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + +diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +index 93db983b92c0..37d9e1addd1e 100644 +--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp ++++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +@@ -2062,17 +2062,6 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + MO.setReg(ARM::PC); + PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); + MBB.erase(MBBI); +- // We now restore LR into PC so it is not live-out of the return block +- // anymore: Clear the CSI Restored bit. +- MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); +- // CSI should be fixed after PrologEpilog Insertion +- assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); +- for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { +- if (Info.getReg() == ARM::LR) { +- Info.setRestored(false); +- break; +- } +- } + return true; + } + } +@@ -2120,14 +2109,22 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; + +- bool Modified = false; ++ bool Modified = false, ModifiedLDMReturn = false; + for (MachineBasicBlock &MBB : Fn) { + Modified |= LoadStoreMultipleOpti(MBB); + if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) +- Modified |= MergeReturnIntoLDM(MBB); ++ ModifiedLDMReturn |= MergeReturnIntoLDM(MBB); + if (isThumb1) + Modified |= CombineMovBx(MBB); + } ++ Modified |= ModifiedLDMReturn; ++ ++ // If we merged a BX instruction into an LDM, we need to re-calculate whether ++ // LR is restored. This check needs to consider the whole function, not just ++ // the instruction(s) we changed, because there may be other BX returns which ++ // still need LR to be restored. ++ if (ModifiedLDMReturn) ++ ARMFrameLowering::updateLRRestored(Fn); + + Allocator.DestroyAll(); + return Modified; +-- +2.33.0 + diff --git a/0201-third-party-Add-install-targets-for-gtest.patch b/0201-third-party-Add-install-targets-for-gtest.patch deleted file mode 100644 index 5c8613057f4ab45a7562fc1055bcf6ac2d1f3ac0..0000000000000000000000000000000000000000 --- a/0201-third-party-Add-install-targets-for-gtest.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001 -From: Tom Stellard -Date: Thu, 17 Nov 2022 09:01:10 +0000 -Subject: Add install targets for gtest - -Stand-alone builds need an installed version of gtest in order to run -the unittests. - -Differential Revision: https://reviews.llvm.org/D137890 ---- - third-party/unittest/CMakeLists.txt | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/third-party/unittest/CMakeLists.txt b/third-party/unittest/CMakeLists.txt -index 0e54e0e57c35..1d2a52730d7d 100644 ---- a/third-party/unittest/CMakeLists.txt -+++ b/third-party/unittest/CMakeLists.txt -@@ -65,12 +65,25 @@ if (NOT LLVM_ENABLE_THREADS) - endif () - - target_include_directories(llvm_gtest -- PUBLIC googletest/include googlemock/include -+ PUBLIC $ -+ $ -+ $ -+ $ - PRIVATE googletest googlemock - ) - - add_subdirectory(UnitTestMain) - -+if (LLVM_INSTALL_GTEST) -+export(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport FILE LLVMGTestConfig.cmake) -+install(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport EXPORT LLVMGTestConfig -+ ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT llvm_gtest) -+ install(EXPORT LLVMGTestConfig DESTINATION ${LLVM_INSTALL_PACKAGE_DIR} COMPONENT llvm_gtest) -+ add_llvm_install_targets(install-llvm_gtest COMPONENT llvm_gtest DEPENDS llvm_gtest LLVMGTestConfig.cmake) -+ install(DIRECTORY googletest/include/gtest/ DESTINATION include/llvm-gtest/gtest/ COMPONENT llvm_gtest) -+ install(DIRECTORY googlemock/include/gmock/ DESTINATION include/llvm-gmock/gmock/ COMPONENT llvm_gtest) -+endif() -+ - # When LLVM_LINK_LLVM_DYLIB is enabled, libLLVM.so is added to the interface - # link libraries for gtest and gtest_main. This means that any target, like - # unittests for example, that links against gtest will be forced to link --- -2.34.3 - diff --git a/cmake-16.0.6.src.tar.xz b/cmake-16.0.6.src.tar.xz deleted file mode 100644 index a879639e5bd3fe11d0e23a0ae2bc0eee7e76eb8f..0000000000000000000000000000000000000000 Binary files a/cmake-16.0.6.src.tar.xz and /dev/null differ diff --git a/cmake-17.0.6.src.tar.xz b/cmake-17.0.6.src.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..aeec995a7c0ddaa99a8549223bb1cb8ddab8ab32 Binary files /dev/null and b/cmake-17.0.6.src.tar.xz differ diff --git a/llvm-16.0.6.src.tar.xz b/llvm-17.0.6.src.tar.xz similarity index 78% rename from llvm-16.0.6.src.tar.xz rename to llvm-17.0.6.src.tar.xz index 8969af44782d5795baf921f365b8f9d1a20a4b08..8489442f14da0ebe1f0aa90b6dfe69ca442110a6 100644 Binary files a/llvm-16.0.6.src.tar.xz and b/llvm-17.0.6.src.tar.xz differ diff --git a/llvm.spec b/llvm.spec index 6ba5cc9fabaea204aef6aaf2618e14690ff15bd8..e51825f1bf32298e2d5ae06079042da1feb7a6d6 100644 --- a/llvm.spec +++ b/llvm.spec @@ -28,7 +28,7 @@ %global llvm_srcdir llvm-%{maj_ver}.%{min_ver}.%{patch_ver}.src %global patch_ver 6 %global min_ver 0 -%global maj_ver 16 +%global maj_ver 17 %if %{with compat_build} %global pkg_libdir %{install_libdir} @@ -63,10 +63,75 @@ Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ve Source2: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ver}.%{min_ver}.%{patch_ver}/%{cmake_srcdir}.tar.xz Source4: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ver}.%{min_ver}.%{patch_ver}/%{third_party_srcdir}.tar.xz -Patch2: 0001-llvm-Add-install-targets-for-gtest.patch -Patch3: 0001-backport-LoongArch-patches.patch -Patch4: 0002-LoongArch-fix-symbol-lookup-error.patch -Patch201: 0201-third-party-Add-install-targets-for-gtest.patch +# Patches for LoongArch +Patch1: 0001-LoongArch-Add-relax-feature-and-keep-relocations-721.patch +Patch2: 0002-LoongArch-Allow-delayed-decision-for-ADD-SUB-relocat.patch +Patch3: 0003-LoongArch-Emit-R_LARCH_RELAX-when-expanding-some-Loa.patch +Patch4: 0004-MC-LoongArch-Add-AlignFragment-size-if-layout-is-ava.patch +Patch5: 0005-LoongArch-RISCV-Support-R_LARCH_-ADD-SUB-_ULEB128-R_.patch +Patch6: 0006-LoongArch-Add-relaxDwarfLineAddr-and-relaxDwarfCFA-t.patch +Patch7: 0007-LoongArch-Insert-nops-and-emit-align-reloc-when-hand.patch +Patch8: 0008-test-Update-dwarf-loongarch-relocs.ll.patch +Patch9: 0009-MC-test-Change-ELF-uleb-ehtable.s-Mach-O-to-use-priv.patch +Patch10: 0010-Mips-MC-AttemptToFoldSymbolOffsetDifference-revert-i.patch +Patch11: 0011-Clang-LoongArch-Use-the-ClangBuiltin-class-to-automa.patch +Patch12: 0012-LoongArch-Add-LSX-intrinsic-support.patch +Patch13: 0013-LoongArch-Add-LASX-intrinsic-support.patch +Patch14: 0014-LoongArch-Add-LSX-intrinsic-testcases.patch +Patch15: 0015-LoongArch-Add-LASX-intrinsic-testcases.patch +Patch16: 0016-LoongArch-Add-testcases-of-LASX-intrinsics-with-imme.patch +Patch17: 0017-LoongArch-MC-Add-invalid-immediate-testcases-for-LSX.patch +Patch18: 0018-LoongArch-MC-Add-invalid-immediate-testcases-for-LAS.patch +Patch19: 0019-LoongArch-Add-testcases-of-LSX-intrinsics-with-immed.patch +Patch20: 0020-LoongArch-Reorder-LoongArchTargetLowering-.-NFC.patch +Patch21: 0021-LoongArch-Fix-typos.-NFC.patch +Patch22: 0022-LoongArch-Set-some-operations-action-for-LSX-and-LAS.patch +Patch23: 0023-LoongArch-Add-codegen-support-for-extractelement-737.patch +Patch24: 0024-LoongArch-Add-some-binary-IR-instructions-testcases-.patch +Patch25: 0025-LoongArch-Add-codegen-support-for-insertelement.patch +Patch26: 0026-LoongArch-Custom-lowering-ISD-BUILD_VECTOR.patch +Patch27: 0027-LoongArch-Add-more-and-or-xor-patterns-for-vector-ty.patch +Patch28: 0028-LoongArch-Add-some-binary-IR-instructions-testcases-.patch +Patch29: 0029-LoongArch-Override-TargetLowering-isShuffleMaskLegal.patch +Patch30: 0030-Reland-LoongArch-Support-CTLZ-with-lsx-lasx.patch +Patch31: 0031-LoongArch-Support-MULHS-MULHU-with-lsx-lasx.patch +Patch32: 0032-LoongArch-Make-ISD-VSELECT-a-legal-operation-with-ls.patch +Patch33: 0033-LoongArch-Add-codegen-support-for-icmp-fcmp-with-lsx.patch +Patch34: 0034-LoongArch-Make-ISD-FSQRT-a-legal-operation-with-lsx-.patch +Patch35: 0035-LoongArch-Mark-ISD-FNEG-as-legal.patch +Patch36: 0036-LoongArch-Add-codegen-support-for-X-VF-MSUB-NMADD-NM.patch +Patch37: 0037-LoongArch-Fix-LASX-vector_extract-codegen.patch +Patch38: 0038-LoongArch-Fix-incorrect-pattern-XVREPL128VEI_-W-D-in.patch +Patch39: 0039-LoongArch-Fix-incorrect-pattern-X-VBITSELI_B-instruc.patch +Patch40: 0040-LoongArch-Permit-auto-vectorization-using-LSX-LASX-w.patch +Patch41: 0041-CodeGen-LoongArch-Set-SINT_TO_FP-UINT_TO_FP-to-legal.patch +Patch42: 0042-CodeGen-LoongArch-Set-FP_TO_SINT-FP_TO_UINT-to-legal.patch +Patch43: 0043-LoongArch-Fixing-the-incorrect-return-value-of-Loong.patch +Patch44: 0044-LoongArch-Pre-commit-test-for-76913.-NFC.patch +Patch45: 0045-LoongArch-Implement-LoongArchRegisterInfo-canRealign.patch +Patch46: 0046-sanitizer-msan-VarArgHelper-for-loongarch64.patch +Patch47: 0047-LowerTypeTests-Add-loongarch64-to-CFI-jumptables-673.patch +Patch48: 0048-LoongArch-Fix-td-pattern-for-CACOP-LDPTE-and-LDDIR.patch +Patch49: 0049-LoongArch-MC-Refine-MCInstrAnalysis-based-on-registe.patch +Patch50: 0050-LoongArch-NFC-Pre-commit-MCInstrAnalysis-tests-for-i.patch +Patch51: 0051-LoongArch-Set-isBarrier-to-true-for-instruction-b-72.patch +Patch52: 0052-LoongArch-MC-Pre-commit-tests-for-instr-bl-fixupkind.patch +Patch53: 0053-LoongArch-MC-Support-to-get-the-FixupKind-for-BL-729.patch +Patch54: 0054-LoongArch-MC-Modify-branch-evaluation-for-MCInstrAna.patch +Patch55: 0055-LoongArch-Precommit-a-test-for-smul-with-overflow-NF.patch +Patch56: 0056-LoongArch-Disable-mulodi4-and-muloti4-libcalls-73199.patch +Patch57: 0057-LoongArch-Fix-pattern-for-FNMSUB_-S-D-instructions-7.patch +Patch58: 0058-LoongArch-Fix-the-procossor-series-mask.patch +Patch59: 0059-LoongArch-Make-sure-that-the-LoongArchISD-BSTRINS-no.patch +Patch60: 0060-LoongArch-Improve-codegen-for-atomic-ops-67391.patch +Patch61: 0061-LoongArch-Add-some-atomic-tests-68766.patch +Patch62: 0062-LoongArch-Support-finer-grained-DBAR-hints-for-LA664.patch +Patch63: 0063-LoongArch-Precommit-a-test-for-atomic-cmpxchg-optmiz.patch +Patch64: 0064-LoongArch-Improve-codegen-for-atomic-cmpxchg-ops-693.patch +Patch65: 0065-LoongArch-Override-LoongArchTargetLowering-getExtend.patch +Patch66: 0066-Memory-Call-__clear_cache-in-InvalidateInstructionCa.patch +Patch67: 0067-Backport-ARM-Check-all-terms-in-emitPopInst-when-clearing-Res.patch +Patch68: 0068-Backport-ARM-Update-IsRestored-for-LR-based-on-all-returns-82.patch BuildRequires: gcc gcc-c++ clang cmake ninja-build zlib-devel libffi-devel BuildRequires: libxml2-devel ncurses-devel python3-psutil python3-sphinx @@ -174,7 +239,6 @@ This is the main package for llvm-toolset. cd .. mv %{cmake_srcdir} cmake %setup -T -q -b 4 -n %{third_party_srcdir} -%autopatch -m200 -p2 cd .. mv %{third_party_srcdir} third-party @@ -486,6 +550,19 @@ fi %endif %changelog +* Tue Jun 04 2024 hezhongkun - 17.0.6-3 +- Backport patch to fix CVE-2024-31852 + +* Tue Apr 9 2024 Chen Li - 17.0.6-2 +- LoongArch Backport: Improve codegen for atomic ops and dbar +- LoongArch Backport: Bugfix +- LoongArch Backport: Improve support and fix some bugs for compiler-rt +- LoongArch Backport: Add the support for vector(LSX/LASX) +- LoongArch Backport: Supoort `relax` feature + +* Fri Mar 8 2024 Xiaoping Liu - 17.0.6-1 +- update to 17.0.6 + * Tue Oct 31 2023 Chen Li - 16.0.6-3 - LoongArch: Fix symbol lookup error when compiling clang on aarch and x86 diff --git a/third-party-16.0.6.src.tar.xz b/third-party-16.0.6.src.tar.xz deleted file mode 100644 index 0ccce099985d47d44f779d3b179493014b11fcd8..0000000000000000000000000000000000000000 Binary files a/third-party-16.0.6.src.tar.xz and /dev/null differ diff --git a/third-party-17.0.6.src.tar.xz b/third-party-17.0.6.src.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..b38f5acb8fb098ae7e71e50d3f352f04beaf63aa Binary files /dev/null and b/third-party-17.0.6.src.tar.xz differ